From f499ada02ccebaeabacbf01668679b69f6044391 Mon Sep 17 00:00:00 2001 From: mxhagen Date: Tue, 16 Sep 2025 13:57:07 +0200 Subject: [PATCH] version 0.7: significant performance update, better progress --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/colors.rs | 3 ++- src/kmeans.rs | 67 +++++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 63 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d320050..0b2b003 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -765,7 +765,7 @@ dependencies = [ [[package]] name = "qtizer" -version = "0.6.1" +version = "0.7.0" dependencies = [ "clap", "image", diff --git a/Cargo.toml b/Cargo.toml index 2a3cb7a..1dfd9b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "qtizer" -version = "0.6.1" +version = "0.7.0" edition = "2024" description = "Quantization/palette-generation tool using k-means clustering on pixel data" readme = "README.md" diff --git a/src/colors.rs b/src/colors.rs index dcb39a3..3ce43ab 100644 --- a/src/colors.rs +++ b/src/colors.rs @@ -16,13 +16,14 @@ impl Kmeansable for Color { vec![0; 4] } + /// distance function, according to which clustering is performed + /// (impl avoids sqrt for performance -- uses squared distance) fn distance(&self, other: &Self) -> f64 { self.data .iter() .zip(other.data.iter()) .map(|(a, b)| ((*a as f64) - (*b as f64)).powi(2)) .sum::() - .sqrt() } fn add(sum: &Self::Sum, other: &Self) -> Self::Sum { diff --git a/src/kmeans.rs b/src/kmeans.rs index 8d30af4..c38be43 100644 --- a/src/kmeans.rs +++ b/src/kmeans.rs @@ -44,22 +44,70 @@ impl Context { .cloned() .collect::>(); + // make cursor invisible + eprint!("\x1b[?25l"); + for i in 0..iterations { // TODO: implement static logger functionality for progress - eprintln!("processing k-means iteration {}/{}...", i + 1, iterations); + // once implemented, replace other eprint(ln)! calls too + eprintln!( + "processing k-means iteration: [ {:>9} / {:>9} ]...", + i + 1, + iterations + ); + + // precompute cluster distances to skip some distance calculations later + // only set for i < j -- note: dist[i][j] == dist[j][i] + let mut cluster_distances = vec![vec![0.0; k]; k]; + for i in 0..k { + for j in (i + 1)..k { + let dist = clusters[i].distance(&clusters[j]); + cluster_distances[i][j] = dist; // i < j case only + } + } // assign each point to the nearest cluster for (i, point) in data.iter().enumerate() { - let min_idx = clusters - .iter() - .enumerate() - .min_by(|(_, a), (_, b)| f64::total_cmp(&a.distance(point), &b.distance(point))) - .unwrap() - .0; + let mut closest_idx = 0; + let mut closest_dist = clusters[0].distance(point); - assignments[i] = min_idx; + // print progress every 500 points + if i % 500 == 0 { + if i > 0 { + // restore cursor position (write over previous status) + eprint!("\x1b[1F"); + } + let label_len = "processing k-means iteration".len(); + eprintln!( + "{:>label_len$}: [ {:>9} / {:>9} ]...", + "assigning point", + i, + data.len() + ); + } + + for j in 1..k { + // skip distance calculation if the cluster is too far away + let (a, b) = (closest_idx.min(j), closest_idx.max(j)); + if cluster_distances[a][b] >= 2.0 * closest_dist { + // d(c_j, c_min) >= 2 * d(p, c_min) + // d(p, c_j ) >= d(p, c_min) + continue; + } + + let dist = clusters[j].distance(point); + if dist < closest_dist { + closest_dist = dist; + closest_idx = j; + } + } + + assignments[i] = closest_idx; } + // restore cursor position (write over previous status) + eprint!("\x1b[2F"); + // move cluster to mean of its assigned points let mut counts: Vec = vec![0; k]; let mut sums = vec![T::zero(); k]; @@ -77,6 +125,9 @@ impl Context { } } + // make cursor visible again + eprint!("\x1b[?25h"); + (clusters, assignments) }