Skip to content

Commit

Permalink
chore: add corpus models
Browse files Browse the repository at this point in the history
  • Loading branch information
RealHinome authored Dec 26, 2023
1 parent 7610953 commit ea07508
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 0 deletions.
84 changes: 84 additions & 0 deletions src/corpus/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use anyhow::Result;
use image::ImageBuffer;
use tract_onnx::prelude::*;

const IMAGE_WIDTH: u32 = 224;
const IMAGE_HEIGHT: u32 = 224;
const RESULT: [&str; 8] = [
"animals",
"flower",
"human",
"landscape",
"nude",
"plant",
"sport",
"vehicle",
];

/// Define a structure to manage the Corpus model.
#[derive(Debug)]
pub struct Corpus {
pub model: super::Model,
}

/// Define a trait for the CorpusManager with methods to interact with the model.
pub trait CorpusManager {
/// Predict label of the entry.
fn predict(&self, buffer: &[u8]) -> Result<String>;
}

impl CorpusManager for Corpus {
/// Predicts the possible label of the input image.
fn predict(&self, buffer: &[u8]) -> Result<String> {
let img = image::load_from_memory(buffer)?;
/*let resized = ImageBuffer::from_vec(
IMAGE_WIDTH,
IMAGE_HEIGHT,
image_processor::resizer::resize(buffer, Some(IMAGE_WIDTH), Some(IMAGE_HEIGHT))?.into_inner()?,
)
.unwrap_or_default();*/

// Todo: no resize if good dimensions are.
/*if img.width() != 224 && img.height() != 224 {
resized = ImageBuffer::from_vec(
IMAGE_WIDTH,
IMAGE_HEIGHT,
image_processor::resizer::resize(buffer, Some(IMAGE_WIDTH), Some(IMAGE_HEIGHT))?,
)
.unwrap_or_default();
}*/

let resized = image::imageops::resize(&img, 224, 224, ::image::imageops::FilterType::Nearest);

let img_array: Tensor =
tract_ndarray::Array::from_shape_fn((1, 3, 224, 224), |(_, c, y, x)| {
resized.get_pixel(x as u32, y as u32)[c] as f32
})
.into();

let outputs = self
.model
.run(tvec!(img_array.permute_axes(&[0, 2, 3, 1])?.into()))?;

let best = outputs[0]
.to_array_view::<f32>()?
.iter()
.cloned()
.zip(1..)
.max_by(|a, b| a.0.partial_cmp(&b.0).unwrap());

Ok(RESULT[best.unwrap().1 - 1].to_string())
}
}

/// Start Corpus model with optimization and return it.
pub fn init() -> Result<super::Model> {
let model = tract_onnx::onnx()
.model_for_path("./src/corpus/model.onnx")?
.with_input_fact(0, f32::fact([1, 224, 224, 3]).into())?
.with_output_fact(0, InferenceFact::dt_shape(f32::datum_type(), tvec![1, 8]))?
.into_optimized()?
.into_runnable()?;

Ok(model)
}
Binary file added src/corpus/model.onnx
Binary file not shown.
120 changes: 120 additions & 0 deletions src/corpus/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import pathlib
import onnxmltools
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

"""Set the path of the dataset."""

data_dir = pathlib.Path(r"/content/drive/MyDrive/nude_or_not")

"""Define variables for train the model."""

# Number of samples that will be propagated through the network
batch_size = 64
# Image width and height after resizing.
img_height = 224
img_width = 224

train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=128,
image_size=(img_height, img_width),
batch_size=batch_size)

val_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=128,
image_size=(img_height, img_width),
batch_size=batch_size)

class_names = train_ds.class_names
num_classes = len(class_names)

AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

normalization_layer = layers.Rescaling(1./255)

normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]

model = keras.Sequential([
layers.Input(batch_shape=(batch_size, img_height, img_width, 3)),
layers.Rescaling(1./255),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])

model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

model.summary()
epochs=20
history = model.fit(
train_ds,
validation_data=val_ds,
steps_per_epoch=300,
epochs=epochs,
batch_size=batch_size,
)

data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal",
input_shape=(img_height,
img_width,
3)),
layers.RandomRotation(0.1),
layers.RandomZoom(0.1),
]
)

model = Sequential([
layers.Input(batch_shape=(batch_size, img_height, img_width, 3)),
data_augmentation,
layers.Rescaling(1./255),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Dropout(0.2),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])

model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])

model.summary()

epochs = 13
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs,
batch_size=batch_size,
)

onnx_model = onnxmltools.convert_keras(model, target_opset=2)
onnxmltools.utils.save_model(onnx_model, 'corpus.onnx')

0 comments on commit ea07508

Please sign in to comment.