chore: add corpus models

Gravitalia · Dec 26, 2023 · ea07508 · ea07508
1 parent 7610953
commit ea07508
Show file tree

Hide file tree

Showing 3 changed files with 204 additions and 0 deletions.
diff --git a/src/corpus/mod.rs b/src/corpus/mod.rs
@@ -0,0 +1,84 @@
+use anyhow::Result;
+use image::ImageBuffer;
+use tract_onnx::prelude::*;
+
+const IMAGE_WIDTH: u32 = 224;
+const IMAGE_HEIGHT: u32 = 224;
+const RESULT: [&str; 8] = [
+    "animals",
+    "flower",
+    "human",
+    "landscape",
+    "nude",
+    "plant",
+    "sport",
+    "vehicle",
+];
+
+/// Define a structure to manage the Corpus model.
+#[derive(Debug)]
+pub struct Corpus {
+    pub model: super::Model,
+}
+
+/// Define a trait for the CorpusManager with methods to interact with the model.
+pub trait CorpusManager {
+    /// Predict label of the entry.
+    fn predict(&self, buffer: &[u8]) -> Result<String>;
+}
+
+impl CorpusManager for Corpus {
+    /// Predicts the possible label of the input image.
+    fn predict(&self, buffer: &[u8]) -> Result<String> {
+        let img = image::load_from_memory(buffer)?;
+        /*let resized = ImageBuffer::from_vec(
+            IMAGE_WIDTH,
+            IMAGE_HEIGHT,
+            image_processor::resizer::resize(buffer, Some(IMAGE_WIDTH), Some(IMAGE_HEIGHT))?.into_inner()?,
+        )
+        .unwrap_or_default();*/
+
+        // Todo: no resize if good dimensions are.
+        /*if img.width() != 224 && img.height() != 224 {
+            resized = ImageBuffer::from_vec(
+                IMAGE_WIDTH,
+                IMAGE_HEIGHT,
+                image_processor::resizer::resize(buffer, Some(IMAGE_WIDTH), Some(IMAGE_HEIGHT))?,
+            )
+            .unwrap_or_default();
+        }*/
+
+        let resized = image::imageops::resize(&img, 224, 224, ::image::imageops::FilterType::Nearest);
+
+        let img_array: Tensor =
+            tract_ndarray::Array::from_shape_fn((1, 3, 224, 224), |(_, c, y, x)| {
+                resized.get_pixel(x as u32, y as u32)[c] as f32
+            })
+            .into();
+
+        let outputs = self
+            .model
+            .run(tvec!(img_array.permute_axes(&[0, 2, 3, 1])?.into()))?;
+
+        let best = outputs[0]
+            .to_array_view::<f32>()?
+            .iter()
+            .cloned()
+            .zip(1..)
+            .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
+
+        Ok(RESULT[best.unwrap().1 - 1].to_string())
+    }
+}
+
+/// Start Corpus model with optimization and return it.
+pub fn init() -> Result<super::Model> {
+    let model = tract_onnx::onnx()
+        .model_for_path("./src/corpus/model.onnx")?
+        .with_input_fact(0, f32::fact([1, 224, 224, 3]).into())?
+        .with_output_fact(0, InferenceFact::dt_shape(f32::datum_type(), tvec![1, 8]))?
+        .into_optimized()?
+        .into_runnable()?;
+
+    Ok(model)
+}
diff --git a/src/corpus/model.onnx b/src/corpus/model.onnx
diff --git a/src/corpus/model.py b/src/corpus/model.py
@@ -0,0 +1,120 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import tensorflow as tf
+import pathlib
+import onnxmltools
+from tensorflow import keras
+from tensorflow.keras import layers
+from tensorflow.keras.models import Sequential
+
+"""Set the path of the dataset."""
+
+data_dir = pathlib.Path(r"/content/drive/MyDrive/nude_or_not")
+
+"""Define variables for train the model."""
+
+# Number of samples that will be propagated through the network
+batch_size = 64
+# Image width and height after resizing.
+img_height = 224
+img_width = 224
+
+train_ds = tf.keras.utils.image_dataset_from_directory(
+  data_dir,
+  validation_split=0.2,
+  subset="training",
+  seed=128,
+  image_size=(img_height, img_width),
+  batch_size=batch_size)
+
+val_ds = tf.keras.utils.image_dataset_from_directory(
+  data_dir,
+  validation_split=0.2,
+  subset="validation",
+  seed=128,
+  image_size=(img_height, img_width),
+  batch_size=batch_size)
+
+class_names = train_ds.class_names
+num_classes = len(class_names)
+
+AUTOTUNE = tf.data.AUTOTUNE
+
+train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
+val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
+
+normalization_layer = layers.Rescaling(1./255)
+
+normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
+image_batch, labels_batch = next(iter(normalized_ds))
+first_image = image_batch[0]
+
+model = keras.Sequential([
+  layers.Input(batch_shape=(batch_size, img_height, img_width, 3)),
+  layers.Rescaling(1./255),
+  layers.Conv2D(16, 3, padding='same', activation='relu'),
+  layers.MaxPooling2D(),
+  layers.Conv2D(32, 3, padding='same', activation='relu'),
+  layers.MaxPooling2D(),
+  layers.Conv2D(64, 3, padding='same', activation='relu'),
+  layers.MaxPooling2D(),
+  layers.Flatten(),
+  layers.Dense(128, activation='relu'),
+  layers.Dense(num_classes)
+])
+
+model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
+
+model.summary()
+epochs=20
+history = model.fit(
+  train_ds,
+  validation_data=val_ds,
+  steps_per_epoch=300,
+  epochs=epochs,
+  batch_size=batch_size,
+)
+
+data_augmentation = keras.Sequential(
+  [
+    layers.RandomFlip("horizontal",
+                      input_shape=(img_height,
+                                  img_width,
+                                  3)),
+    layers.RandomRotation(0.1),
+    layers.RandomZoom(0.1),
+  ]
+)
+
+model = Sequential([
+  layers.Input(batch_shape=(batch_size, img_height, img_width, 3)),
+  data_augmentation,
+  layers.Rescaling(1./255),
+  layers.Conv2D(16, 3, padding='same', activation='relu'),
+  layers.MaxPooling2D(),
+  layers.Conv2D(32, 3, padding='same', activation='relu'),
+  layers.MaxPooling2D(),
+  layers.Conv2D(64, 3, padding='same', activation='relu'),
+  layers.MaxPooling2D(),
+  layers.Dropout(0.2),
+  layers.Flatten(),
+  layers.Dense(128, activation='relu'),
+  layers.Dense(num_classes)
+])
+
+model.compile(optimizer='adam',
+              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+              metrics=['accuracy'])
+
+model.summary()
+
+epochs = 13
+history = model.fit(
+  train_ds,
+  validation_data=val_ds,
+  epochs=epochs,
+  batch_size=batch_size,
+)
+
+onnx_model = onnxmltools.convert_keras(model, target_opset=2)
+onnxmltools.utils.save_model(onnx_model, 'corpus.onnx')