diff --git a/models/html_verificator.py b/models/html_verificator.py
new file mode 100644
index 0000000..33e66aa
--- /dev/null
+++ b/models/html_verificator.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+"""HTML Verificator.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+ https://colab.research.google.com/drive/1ACw4ewe70LE2svjJT_f6nHK2yggo3Ayn
+
+# Phishing website detection based on HTML content
+> We are using a basic text classifier (RNN) to determine if the website is a phishing one, or legitm one.
+
+Import required modules
+"""
+
+import numpy as np
+
+import tensorflow_datasets as tfds
+import tensorflow as tf
+
+tf.get_logger().setLevel('ERROR')
+
+"""Set basic variables"""
+
+AUTOTUNE = tf.data.AUTOTUNE
+batch_size = 64
+seed = 42
+
+"""Set dataset"""
+
+train_dataset = tf.keras.utils.text_dataset_from_directory(
+ 'drive/MyDrive/is_ai',
+ batch_size=batch_size,
+ validation_split=0.2,
+ subset='training',
+ seed=seed)
+
+class_names = train_dataset.class_names
+train_ds = train_dataset.cache().prefetch(buffer_size=AUTOTUNE)
+
+test_dataset = tf.keras.utils.text_dataset_from_directory(
+ 'drive/MyDrive/is_ai',
+ batch_size=batch_size,
+ validation_split=0.2,
+ subset='validation',
+ seed=seed)
+
+test_dataset = test_dataset.cache().prefetch(buffer_size=AUTOTUNE)
+
+for example, label in train_dataset.take(1):
+ print('text: ', example.numpy()[:2])
+ print('label: ', label.numpy()[:2])
+
+"""Create the text encoder"""
+
+VOCAB_SIZE = 1000
+encoder = tf.keras.layers.TextVectorization(
+ max_tokens=VOCAB_SIZE)
+encoder.adapt(train_dataset.map(lambda text, label: text))
+
+"""Create model"""
+
+model = tf.keras.Sequential([
+ encoder,
+ tf.keras.layers.Embedding(
+ input_dim=len(encoder.get_vocabulary()),
+ output_dim=64,
+ # Use masking to handle the variable sequence lengths
+ mask_zero=True),
+ tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
+ tf.keras.layers.Dense(64, activation='relu'),
+ tf.keras.layers.Dense(1)
+])
+
+"""Compile model to start training"""
+
+model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+ optimizer=tf.keras.optimizers.Adam(1e-4),
+ metrics=['accuracy'])
+
+"""Train model"""
+
+history = model.fit(train_dataset, epochs=10,
+ validation_data=test_dataset,
+ validation_steps=30)
+
+test_loss, test_acc = model.evaluate(test_dataset)
+
+print('Test Loss:', test_loss)
+print('Test Accuracy:', test_acc)
+
+"""Stack second model"""
+
+model = tf.keras.Sequential([
+ encoder,
+ tf.keras.layers.Embedding(len(encoder.get_vocabulary()), 64, mask_zero=True),
+ tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
+ tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
+ tf.keras.layers.Dense(64, activation='relu'),
+ tf.keras.layers.Dropout(0.5),
+ tf.keras.layers.Dense(1)
+])
+
+model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+ optimizer=tf.keras.optimizers.Adam(1e-4),
+ metrics=['accuracy'])
+
+history = model.fit(train_dataset, epochs=10,
+ validation_data=test_dataset,
+ validation_steps=30)
+
+test_loss, test_acc = model.evaluate(test_dataset)
+
+print('Test Loss:', test_loss)
+print('Test Accuracy:', test_acc)
+
+"""Use model"""
+
+text_text = ""
+
+for text, _ in train_dataset.take(1):
+ text_text = text[:1]
+
+predictions = model.predict(text_text)
+print(predictions)
+
+model.save("phishing")
+
+"""Use model"""
+
+new_model = tf.keras.models.load_model("phishing")
+
+predictions = new_model.predict(text_text)
+print(predictions)
+
+test_loss, test_acc = new_model.evaluate(test_dataset)
+
+print('Test Loss:', test_loss)
+print('Test Accuracy:', test_acc)
\ No newline at end of file