Initial commit

2024-03-08 16:52:45 +01:00 · 2024-03-08 16:52:45 +01:00 · c84187f446
commit c84187f446
8 changed files with 571 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 /target
--- a/Cargo.lock
+++ b/Cargo.lock
@ -0,0 +1,75 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
 version = 3
 [[package]]
 name = "aicaramba"
 version = "0.1.0"
 dependencies = [
 "rand",
 ]
 [[package]]
 name = "cfg-if"
 version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 [[package]]
 name = "getrandom"
 version = "0.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
 dependencies = [
 "cfg-if",
 "libc",
 "wasi",
 ]
 [[package]]
 name = "libc"
 version = "0.2.153"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
 [[package]]
 name = "ppv-lite86"
 version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 [[package]]
 name = "rand"
 version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 dependencies = [
 "libc",
 "rand_chacha",
 "rand_core",
 ]
 [[package]]
 name = "rand_chacha"
 version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
 dependencies = [
 "ppv-lite86",
 "rand_core",
 ]
 [[package]]
 name = "rand_core"
 version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
 "getrandom",
 ]
 [[package]]
 name = "wasi"
 version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,10 @@
 [package]
 name = "aicaramba"
 version = "0.1.0"
 edition = "2021"
 license = "MIT"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
 rand = { version = "0.8.5", optional = true }
--- a/src/bin/xor.rs
+++ b/src/bin/xor.rs
@ -0,0 +1,18 @@
 use aicaramba::functions::*;
 use aicaramba::neural_net::NeuralNet;
 fn main() {
    let mut net = NeuralNet::new(vec![2, 3, 1], SIGMOID, MSE, 0.05);
    let epochs = 10_000;
    let inputs = vec![
        vec![0.0, 0.0],
        vec![0.0, 1.0],
        vec![1.0, 0.0],
        vec![1.0, 1.0],
    ];
    let expected = vec![vec![0.0], vec![1.0], vec![1.0], vec![0.0]];
    net.train_basic(inputs, expected, epochs);
 }
--- a/src/functions.rs
+++ b/src/functions.rs
@ -0,0 +1,29 @@
 use crate::matrix::MatElem;
 #[derive(Clone, Copy, Debug)]
 pub struct ActivationFn<T>
 where
    T: MatElem,
 {
    pub f: fn(T) -> T,
    pub f_prime: fn(T) -> T,
 }
 #[derive(Clone, Copy, Debug)]
 pub struct LossFn<T>
 where
    T: MatElem,
 {
    pub f: fn(T, T) -> T,
    pub f_prime: fn(T, T) -> T,
 }
 pub const SIGMOID: ActivationFn<f64> = ActivationFn {
    f: |x| 1.0 / (1.0 + f64::exp(-x)),
    f_prime: |x| x * (1.0 - x),
 };
 pub const MSE: LossFn<f64> = LossFn {
    f: |y_hat, y| (y_hat - y).powi(2),
    f_prime: |y_hat, y| -2.0 * (y_hat - y),
 };
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,3 @@
 pub mod functions;
 pub mod matrix;
 pub mod neural_net;
--- a/src/matrix.rs
+++ b/src/matrix.rs
@ -0,0 +1,300 @@
 use rand::Rng;
 use std::ops::{Add, Mul, Sub};
 // NOTE: might want to rethink design (to 2d-vec?) to enable `matrix[i][j]`
 // indexing and make nice row-iterator implementation possible
 #[derive(Debug, Clone)]
 pub struct Mat<T>
 where
    T: MatElem,
 {
    pub rows: usize,
    pub cols: usize,
    data: Vec<T>,
 }
 /// Shorthand/alias trait for types that are valid as matrix elements.
 pub trait MatElem:
    PartialEq + Clone + Default + Add<Output = Self> + Sub<Output = Self> + Mul<Output = Self>
 {
 }
 impl<T> MatElem for T where
    T: PartialEq + Clone + Default + Add<Output = T> + Sub<Output = T> + Mul<Output = T>
 {
 }
 impl<T> Mat<T>
 where
    T: MatElem,
 {
    pub fn new(rows: usize, cols: usize, data: Vec<T>) -> Mat<T> {
        assert!(data.len() == rows * cols, "Invalid Size");
        Mat { rows, cols, data }
    }
    pub fn at(&self, row: usize, col: usize) -> &T {
        &self.data[row * self.cols + col]
    }
    pub fn at_mut(&mut self, row: usize, col: usize) -> &mut T {
        &mut self.data[row * self.cols + col]
    }
    pub fn default_with_size(rows: usize, cols: usize) -> Mat<T> {
        Mat {
            rows,
            cols,
            data: vec![T::default(); cols * rows],
        }
    }
    pub fn add(&self, other: &Mat<T>) -> Mat<T> {
        if self.rows != other.rows || self.cols != other.cols {
            panic!("Attempted to add matrices with differing shapes.");
        }
        self.elementwise(other, |a, b| a + b)
    }
    pub fn sub(&self, other: &Mat<T>) -> Mat<T>
    where
        T: std::ops::Sub<Output = T>,
    {
        if self.rows != other.rows || self.cols != other.cols {
            panic!("Attempted to subtract matrices with differing shapes.");
        }
        self.elementwise(other, |a, b| a - b)
    }
    pub fn elementwise_mul(&self, other: &Mat<T>) -> Mat<T>
    where
        T: std::ops::Mul<Output = T>,
    {
        if self.rows != other.rows || self.cols != other.cols {
            panic!("Attempted to elementwise-multiply matrices of differing shapes.");
        }
        self.elementwise(other, |a, b| a * b)
    }
    pub fn elementwise(&self, other: &Mat<T>, f: fn(T, T) -> T) -> Mat<T> {
        if self.rows != other.rows || self.cols != other.cols {
            panic!("Attempted to apply element-wise operation to matrices with differing shapes.");
        }
        let data = self
            .data
            .iter()
            .zip(other.data.iter())
            .map(|(a, b)| f(a.clone(), b.clone()))
            .collect::<Vec<_>>();
        Mat {
            rows: self.rows,
            cols: self.cols,
            data,
        }
    }
    pub fn dot(&self, other: &Mat<T>) -> Mat<T> {
        if self.cols != other.rows {
            panic!(
                "Attempted to take dot product of incompatible matrix shapes. (A.cols != B.rows)"
            );
        }
        let mut data = vec![T::default(); self.rows * other.cols];
        for i in 0..self.rows {
            for j in 0..other.cols {
                let mut sum = T::default();
                for k in 0..self.cols {
                    sum = sum
                        + self.data[i * self.cols + k].clone()
                            * other.data[k * other.cols + j].clone();
                }
                data[i * other.cols + j] = sum;
            }
        }
        Mat {
            rows: self.rows,
            cols: other.cols,
            data,
        }
    }
    pub fn transpose(&self) -> Mat<T> {
        let mut buffer = vec![T::default(); self.cols * self.rows];
        for i in 0..self.rows {
            for j in 0..self.cols {
                buffer[j * self.rows + i] = self.data[i * self.cols + j].clone();
            }
        }
        Mat {
            rows: self.cols,
            cols: self.rows,
            data: buffer,
        }
    }
    pub fn map<F>(&self, f: F) -> Mat<T>
        where F: FnMut(T) -> T
    {
        Mat {
            rows: self.rows, cols: self.cols,
            data: self.data.clone()
                .into_iter()
                .map(f)
                .collect()
        }
    }
 }
 pub trait Collect<T>
    where T: MatElem,
 {
    fn collect_mat(self, rows: usize, cols: usize) -> Mat<T>;
 }
 impl<T> Collect<T> for T
 where T: MatElem + std::iter::IntoIterator<Item = T>
 {
    fn collect_mat(self, rows: usize, cols: usize) -> Mat<T> {
        let data = self.into_iter().collect::<Vec<T>>();
        if data.len() != rows * cols {
            panic!("Collecting iterator into matrix failed due to incompatible matrix shape.")
        }
        Mat { rows, cols, data }
    }
 }
 // the random function is only available if `rand` supports randomizing the element type
 impl<T> Mat<T>
 where
    T: MatElem,
    rand::distributions::Standard: rand::distributions::Distribution<T>,
 {
    // TODO: depend on randomization feature
    pub fn random(rows: usize, cols: usize) -> Mat<T> {
        let mut data = Vec::with_capacity(rows * cols);
        for _ in 0..rows * cols {
            data.push(rand::thread_rng().gen());
        }
        Mat { rows, cols, data }
    }
 }
 // NOTE: might want to change this to two row- and col-iters in the future
 //       then might implement something like `flat_iter` that mirrors
 //       current behavior.
 impl<T> IntoIterator for Mat<T>
 where
    T: MatElem,
 {
    type Item = T;
    type IntoIter = std::vec::IntoIter<Self::Item>;
    fn into_iter(self) -> Self::IntoIter {
        self.data.into_iter()
    }
 }
 impl<T> From<Vec<T>> for Mat<T>
 where
    T: MatElem,
 {
    fn from(value: Vec<T>) -> Self {
        let rows = value.len();
        let cols = 1;
        Mat {
            rows,
            cols,
            data: value,
        }
    }
 }
 impl<T> From<Vec<Vec<T>>> for Mat<T>
 where
    T: MatElem,
 {
    fn from(value: Vec<Vec<T>>) -> Self {
        let rows = value.len();
        let cols = value.first().map(Vec::len).unwrap_or(0);
        Mat {
            rows,
            cols,
            data: value.into_iter().flatten().collect(),
        }
    }
 }
 impl<T> PartialEq for Mat<T>
 where
    T: MatElem,
 {
    fn eq(&self, other: &Self) -> bool {
        self.rows == other.rows && self.cols == other.cols && self.data == other.data
    }
 }
 impl<T> std::ops::Index<usize> for Mat<T>
 where
    T: MatElem,
 {
    type Output = [T];
    fn index(&self, index: usize) -> &Self::Output {
        &self.data[index * self.cols..(index + 1) * self.cols]
    }
 }
 impl<T> std::fmt::Display for Mat<T>
 where
    T: MatElem + std::fmt::Display,
 {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        for row in 0..self.rows {
            for col in 0..self.cols {
                write!(f, "{}", self.data[row * self.cols + col])?;
                if col < self.cols - 1 {
                    write!(f, "\t")?;
                }
            }
            writeln!(f)?;
        }
        Ok(())
    }
 }
 #[macro_export]
 macro_rules! matrix {
    ( $( $($val:expr),+ );* $(;)? ) => {
        {
            let mut data = Vec::<f64>::new();
            let mut rows = 0;
            let mut cols = 0;
            $(
                let row_data = vec![$($val),+];
                data.extend(row_data);
                rows += 1;
                let row_len = vec![$($val),+].len();
                if cols == 0 {
                    cols = row_len;
                } else if cols != row_len {
                    panic!("Inconsistent number of elements in the matrix rows");
                }
            )*
            Mat {
                rows,
                cols,
                data,
            }
        }
    };
 }
--- a/src/neural_net.rs
+++ b/src/neural_net.rs
@ -0,0 +1,135 @@
 use crate::functions::*;
 use crate::matrix::{Mat, MatElem};
 /// Contains the following values:
 /// - `architecture: Vec<usize>`: The node counts for each layer (eg. `vec![2, 3, 1]`)
 /// - `weights: Vec<Mat>`: The weight matrices between two layers.
 /// - `biases: Vec<Mat>`: The bias matrices of the layers.
 /// - `learning_rate: f64`: The scalar learning rate.
 /// - `activation: ActivationFn`: Struct containing activation function and derivative
 /// - `loss: LossFn`: Struct containing loss function and derivative
 /// - `data: Vec<Mat>`: A buffer for the activated values during forward- and backward pass.
 pub struct NeuralNet<T>
 where
    T: MatElem,
 {
    architecture: Vec<usize>,
    weights: Vec<Mat<T>>,
    biases: Vec<Mat<T>>,
    learning_rate: T,
    activation: ActivationFn<T>,
    loss: LossFn<T>,
    data: Vec<Mat<T>>,
 }
 impl<T> NeuralNet<T>
 where
    T: MatElem,
    rand::distributions::Standard: rand::distributions::Distribution<T>,
 {
    pub fn new(
        layers: Vec<usize>,
        activation: ActivationFn<T>,
        loss: LossFn<T>,
        learning_rate: T,
    ) -> Self {
        let mut weights = vec![];
        let mut biases = vec![];
        for i in 0..layers.len() - 1 {
            weights.push(Mat::random(layers[i + 1], layers[i]));
            biases.push(Mat::random(layers[i + 1], 1));
        }
        NeuralNet {
            architecture: layers,
            weights,
            biases,
            data: vec![],
            activation,
            loss,
            learning_rate,
        }
    }
 }
 impl<T> NeuralNet<T>
 where
    T: MatElem,
 {
    pub fn forward(&mut self, inputs: Mat<T>) -> Mat<T> {
        if self.architecture[0] != inputs.rows {
            panic!("Input vector does not have correct number of rows.")
        }
        let mut current = inputs;
        self.data = vec![current.clone()];
        for i in 0..self.architecture.len() - 1 {
            current = self.weights[i]
                .dot(&current)
                .add(&self.biases[i])
                .map(self.activation.f);
            self.data.push(current.clone());
        }
        current
    }
    pub fn backprop(&mut self, prediction: Mat<T>, truth: Mat<T>) {
        let mut losses = prediction.elementwise(&truth, self.loss.f_prime);
        let mut gradients = prediction.clone().map(self.activation.f_prime);
        for i in (0..self.architecture.len() - 1).rev() {
            gradients = gradients
                .elementwise_mul(&losses)
                .map(|x| x * self.learning_rate.clone());
            self.weights[i] = self.weights[i].add(&gradients.dot(&self.data[i].transpose()));
            self.biases[i] = self.biases[i].add(&gradients);
            losses = self.weights[i].transpose().dot(&losses);
            gradients = self.data[i].map(self.activation.f_prime);
        }
    }
    // TODO: add batch-wise training
    // TODO: refactor to use matrices instead of 2d-vecs
    pub fn train_basic(&mut self, inputs: Vec<Vec<T>>, truth: Vec<Vec<T>>, epochs: u32)
    where
        T: std::fmt::Display,
    {
        let width = epochs.ilog10() as usize + 1;
        for i in 1..=epochs {
            let mut outputs: Mat<T>;
            for j in 0..inputs.len() {
                outputs = self.forward(Mat::from(inputs[j].clone()));
                self.backprop(outputs, Mat::from(truth[j].clone()));
            }
            if epochs < 20 || i % (epochs / 20) == 0 {
                let mut loss = T::default();
                for j in 0..inputs.len() {
                    outputs = self.forward(Mat::from(inputs[j].clone()));
                    loss = loss
                        + outputs
                            .into_iter()
                            .zip(&truth[j])
                            .fold(T::default(), |sum, (y_hat, y)| {
                                sum + (self.loss.f)(y_hat, y.clone())
                            });
                }
                println!(
                    "epoch: {i:0>width$} / {epochs:0>width$} ;\tloss: {:.5}",
                    loss
                );
            }
        }
    }
 }