version 0.7: significant performance update, better progress

This commit is contained in:
mxhagen 2025-09-16 13:57:07 +02:00
parent 28f4ff1eee
commit f499ada02c
4 changed files with 63 additions and 11 deletions

2
Cargo.lock generated
View File

@ -765,7 +765,7 @@ dependencies = [
[[package]] [[package]]
name = "qtizer" name = "qtizer"
version = "0.6.1" version = "0.7.0"
dependencies = [ dependencies = [
"clap", "clap",
"image", "image",

View File

@ -1,6 +1,6 @@
[package] [package]
name = "qtizer" name = "qtizer"
version = "0.6.1" version = "0.7.0"
edition = "2024" edition = "2024"
description = "Quantization/palette-generation tool using k-means clustering on pixel data" description = "Quantization/palette-generation tool using k-means clustering on pixel data"
readme = "README.md" readme = "README.md"

View File

@ -16,13 +16,14 @@ impl Kmeansable for Color {
vec![0; 4] vec![0; 4]
} }
/// distance function, according to which clustering is performed
/// (impl avoids sqrt for performance -- uses squared distance)
fn distance(&self, other: &Self) -> f64 { fn distance(&self, other: &Self) -> f64 {
self.data self.data
.iter() .iter()
.zip(other.data.iter()) .zip(other.data.iter())
.map(|(a, b)| ((*a as f64) - (*b as f64)).powi(2)) .map(|(a, b)| ((*a as f64) - (*b as f64)).powi(2))
.sum::<f64>() .sum::<f64>()
.sqrt()
} }
fn add(sum: &Self::Sum, other: &Self) -> Self::Sum { fn add(sum: &Self::Sum, other: &Self) -> Self::Sum {

View File

@ -44,22 +44,70 @@ impl Context<SmallRng> {
.cloned() .cloned()
.collect::<Vec<_>>(); .collect::<Vec<_>>();
// make cursor invisible
eprint!("\x1b[?25l");
for i in 0..iterations { for i in 0..iterations {
// TODO: implement static logger functionality for progress // TODO: implement static logger functionality for progress
eprintln!("processing k-means iteration {}/{}...", i + 1, iterations); // once implemented, replace other eprint(ln)! calls too
eprintln!(
"processing k-means iteration: [ {:>9} / {:>9} ]...",
i + 1,
iterations
);
// precompute cluster distances to skip some distance calculations later
// only set for i < j -- note: dist[i][j] == dist[j][i]
let mut cluster_distances = vec![vec![0.0; k]; k];
for i in 0..k {
for j in (i + 1)..k {
let dist = clusters[i].distance(&clusters[j]);
cluster_distances[i][j] = dist; // i < j case only
}
}
// assign each point to the nearest cluster // assign each point to the nearest cluster
for (i, point) in data.iter().enumerate() { for (i, point) in data.iter().enumerate() {
let min_idx = clusters let mut closest_idx = 0;
.iter() let mut closest_dist = clusters[0].distance(point);
.enumerate()
.min_by(|(_, a), (_, b)| f64::total_cmp(&a.distance(point), &b.distance(point)))
.unwrap()
.0;
assignments[i] = min_idx; // print progress every 500 points
if i % 500 == 0 {
if i > 0 {
// restore cursor position (write over previous status)
eprint!("\x1b[1F");
}
let label_len = "processing k-means iteration".len();
eprintln!(
"{:>label_len$}: [ {:>9} / {:>9} ]...",
"assigning point",
i,
data.len()
);
}
for j in 1..k {
// skip distance calculation if the cluster is too far away
let (a, b) = (closest_idx.min(j), closest_idx.max(j));
if cluster_distances[a][b] >= 2.0 * closest_dist {
// d(c_j, c_min) >= 2 * d(p, c_min)
// d(p, c_j ) >= d(p, c_min)
continue;
}
let dist = clusters[j].distance(point);
if dist < closest_dist {
closest_dist = dist;
closest_idx = j;
}
}
assignments[i] = closest_idx;
} }
// restore cursor position (write over previous status)
eprint!("\x1b[2F");
// move cluster to mean of its assigned points // move cluster to mean of its assigned points
let mut counts: Vec<usize> = vec![0; k]; let mut counts: Vec<usize> = vec![0; k];
let mut sums = vec![T::zero(); k]; let mut sums = vec![T::zero(); k];
@ -77,6 +125,9 @@ impl Context<SmallRng> {
} }
} }
// make cursor visible again
eprint!("\x1b[?25h");
(clusters, assignments) (clusters, assignments)
} }