From 651a6f73b57a2596102c801dd5597ac0c98194a3 Mon Sep 17 00:00:00 2001
From: Nodar Okroshiashvili <n.okroshiashvili@gmail.com>
Date: Thu, 2 Mar 2023 00:22:55 +0400
Subject: [PATCH 1/4] Add small utility to profile any function

---
 profiling/profiling.py | 29 +++++++++++++++++++++++++++++
 test_requirements.txt  |  1 +
 2 files changed, 30 insertions(+)
 create mode 100644 profiling/profiling.py

diff --git a/profiling/profiling.py b/profiling/profiling.py
new file mode 100644
index 000000000..78df3edd8
--- /dev/null
+++ b/profiling/profiling.py
@@ -0,0 +1,29 @@
+import functools
+
+from pyinstrument.profiler import Profiler
+
+
+def profile_function(output_file="profile.html"):
+    """
+    Profiles a function execution time.
+
+    Parameters
+    ----------
+    output_file: file to write profile output. Defaults to "profile.html".
+    """
+
+    def decorator(function):
+        @functools.wraps(function)
+        def wrapper(*args, **kwargs):
+            profiler = Profiler()
+            profiler.start()
+            result = function(*args, **kwargs)
+            profiler.stop()
+            output = profiler.output_html()
+            with open(output_file, "w") as f:
+                f.write(output)
+            return result
+
+        return wrapper
+
+    return decorator
diff --git a/test_requirements.txt b/test_requirements.txt
index 42ebfeb9b..d2c088546 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -7,3 +7,4 @@ coverage>=6.4.4
 flake8>=3.9.2
 isort>=5.8.0
 mypy>=0.740
+pyinstrument>=4.4.0

From c22a96b87e44d659f3facb848c80bcbf4d28cc2d Mon Sep 17 00:00:00 2001
From: Nodar Okroshiashvili <n.okroshiashvili@gmail.com>
Date: Tue, 14 Mar 2023 19:26:50 +0400
Subject: [PATCH 2/4] Update gitignore file to exclude profiles directory

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 3ba72acd9..724f605cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -105,6 +105,9 @@ venv.bak/
 # mypy
 .mypy_cache/
 
+# profiling
+/profiles
+
 # Miscelaneous
 .idea
 .vscode

From 43c341cf330b594fdc457710b57dd6e22d8d8fc0 Mon Sep 17 00:00:00 2001
From: Nodar Okroshiashvili <n.okroshiashvili@gmail.com>
Date: Tue, 14 Mar 2023 19:27:09 +0400
Subject: [PATCH 3/4] Add Bash/Zsh script to run time profiling for any Python
 module

---
 profiling/profiling.sh | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 profiling/profiling.sh

diff --git a/profiling/profiling.sh b/profiling/profiling.sh
new file mode 100644
index 000000000..eca490474
--- /dev/null
+++ b/profiling/profiling.sh
@@ -0,0 +1,7 @@
+mkdir -p profiles/code_profiles
+
+file="$@"
+
+pyinstrument -r html -o profiles/code_profiles/performance_profile_$(date "+%Y.%m.%d-%H:%M").html $file
+
+pyinstrument -r speedscope -o profiles/code_profiles/speedscope_$(date "+%Y.%m.%d-%H:%M").json $file

From 7731a4c2f533de1e1f82914c8ee8ef46accf96ba Mon Sep 17 00:00:00 2001
From: Nodar Okroshiashvili <n.okroshiashvili@gmail.com>
Date: Mon, 24 Apr 2023 13:16:38 +0400
Subject: [PATCH 4/4] Add a section in documentation describing code profiling

---
 docs/contribute/contribute_code.rst | 70 +++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/docs/contribute/contribute_code.rst b/docs/contribute/contribute_code.rst
index 754d5c510..e8e346bfe 100644
--- a/docs/contribute/contribute_code.rst
+++ b/docs/contribute/contribute_code.rst
@@ -449,6 +449,76 @@ you want to know why we prefer tox, this
 will tell you everything ;)
 
 
+Code Profiling
+--------------
+
+If you want to profile your code, you can use the **profiling** module in root directory. There you will find two files,
+`profiling.py` and `profiling.sh`. Both file does the same thing but in different ways. The profiling.py file is a python script
+containing a function that must be used as a decorator for the class/method we want to profile.
+The profiling.sh file is a bash/zsh script that you can run from the command line to profile whole .py script file.
+Let us see how to use them. First, start with profiling.py file.
+
+I doubt that `DropDuplicateFeatures` class should take more time than other classes as it iterates over the columns and
+checks if they are duplicated or not. So, I will profile the `DropDuplicateFeatures` class.
+
+First, I will find where this class resides and on top of the imports I will add the following line::
+
+    from profiling.profiling import profile_function
+
+Now, I will decorate the `DropDuplicateFeatures.fit` method with the `profile_function` function::
+    
+        @profile_function(output_file="profile.html")
+        def fit(self, X: pd.DataFrame, y: pd.Series = None):
+            ...
+
+The next step is to create a temporary .py file that will contain the code that we want to profile.
+
+For example, I will create a file named `temp.py` and copy the following code::
+
+    import pandas as pd
+    import numpy as np
+
+    from feature_engine.selection import DropDuplicateFeatures
+
+
+    if __name__ == "__main__":
+        rows = 10000
+        cols = 60000
+        col_names = [f"col_{i}" for i in range(cols)]
+        df = pd.DataFrame(np.random.randint(0, 100, size=(rows, cols)), columns=col_names)
+
+        transformer = DropDuplicateFeatures()
+        transformer.fit(df)
+
+        train_t = transformer.transform(df)
+
+
+Now, I will run the `temp.py` file from the command line::
+    
+        $ python temp.py
+
+This will create a file named `profile.html` in the root directory of the project. This file contains the profiling
+results. You can open it with your favorite browser and inspect the results.
+
+If you don't like adding additional imports and decorator, then you can use the `profiling.sh` file. This file is a bash/zsh
+script that you can run from the command line. Let us see how to use it.
+
+Again, I will profile the `DropDuplicateFeatures` class. I need to create a temporary .py file and put the same code as above.
+After that, open the terminal in root directory and run the following command::
+
+    $ ./profiling/profiling.sh temp.py
+
+
+This will create a directory, named `profiles`, in the root directory of the project. This directory contains tw files:
+the first is .html file and you can open it with any browser, the second file is .json file and you can use
+`speedscope <https://www.speedscope.app/>`_ to visualize results.
+
+
+.. note::
+    To profile the memory usage, you can use the `memray` package. You can find more information about it
+    `here <https://bloomberg.github.io/memray/index.html>`_.
+
+
 Review Process
 --------------