SJ001 · phiat · Oct 27, 2020 · Oct 27, 2020
diff --git a/README.md b/README.md
@@ -16,17 +16,17 @@ Move into a clean directory and run the following Python commands:
     import aifeynman
 
     aifeynman.get_demos("example_data") # Download examples from server
-    aifeynman.run_aifeynman("./example_data/", "example1.txt", 60, "14ops.txt", polyfit_deg=3, NN_epochs=500)
+    aifeynman.run_aifeynman("./example_data/", "example1.txt", 30, "14ops.txt", polyfit_deg=3, NN_epochs=500)
 
 This example will get solved in about 10-30 minutes depending on what computer you have and whether you have a GPU.
 
-Here ‘example.txt’ contains the data table to perform symbolic regression on, with columns separated by spaces, commas or tabs. The other parameters control the search: here the brute-force modules tries combinations of the 14 basic operations in ‘14ops.txt’ for up to 30 seconds, polynomial fits are tried up to degree 3, and the interpolating neural network is trained for up to 500 epochs.
+Here ‘example1.txt’ contains the data table to perform symbolic regression on, with columns separated by spaces, commas or tabs. The other parameters control the search: here the brute-force modules tries combinations of the 14 basic operations in ‘14ops.txt’ for up to 30 seconds, polynomial fits are tried up to degree 3, and the interpolating neural network is trained for up to 500 epochs.
 
 # AI-Feynman
 
 This code is an improved implementation of AI Feynman: a Physics-Inspired Method for Symbolic Regression, Silviu-Marian Udrescu and Max Tegmark (2019) [[Science Advances](https://advances.sciencemag.org/content/6/16/eaay2631/tab-pdf)] and AI Feynman 2.0: Pareto-optimal symbolic regression exploiting graph modularity, Udrescu S.M. et al. (2020) [[arXiv](https://arxiv.org/abs/2006.10782)].
 
-Please check [this Medium article](https://towardsdatascience.com/ai-feynman-2-0-learning-regression-equations-from-data-3232151bd929) for a more detailed eplanation of how to get the code running.
+Please check [this Medium article](https://towardsdatascience.com/ai-feynman-2-0-learning-regression-equations-from-data-3232151bd929) for a more detailed explanation of how to get the code running.
 
 In order to get started, run compile.sh to compile the fortran files used for the brute force code.
 
@@ -43,10 +43,11 @@ The main function of the code, called by the user, has the following parameters:
 * vars_name - name of the variables appearing in the equation (inluding the name ofthe output variable). This should be passed as a list of strings, with the name of the variables appearing in the same order as they are in the file containing the data
 * test_percentage - percentage of the input data to be kept aside and used as the test set
 
-The data file to be analyzed should be a text file with each column containing the numerical values of each (dependent and independent) variable. The solution file will be saved in the directory called "results" under the name solution_{filename}. The solution file will contain several rows (corresponding to each point on the Pareto frontier), each row showing: 
+The data file to be analyzed should be a text file with each column containing the numerical values of each (dependent and independent) variable. The solution file will be saved in the directory called "results" under the name solution_{filename}. The solution file will contain several rows (corresponding to each point on the [Pareto frontier](# https://en.wikipedia.org/wiki/Pareto_efficiency#Pareto_frontier
+)), each row showing: 
 
-* the mean logarithm in based 2 of the error of the discovered equation applied to the input data (this can be though of as the average error in bits)
-* the cummulative logarithm in based 2 of the error of the discovered equation applied to the input data (this can be though of as the cummulative error in bits)
+* the mean logarithm in base 2 of the error of the discovered equation applied to the input data (this can be thought of as the average error in bits)
+* the cummulative logarithm in base 2 of the error of the discovered equation applied to the input data (this can be thought of as the cummulative error in bits)
 * the complexity of the discovered equation (in bits)
 * the error of the discovered equation applied to the input data
 * the symbolic expression of the discovered equation
@@ -72,3 +73,5 @@ If you compare with, build on, or use aspects of the AI Feynman work, please cit
   publisher={American Association for the Advancement of Science}
 }
 ```
+---
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
diff --git a/aifeynman/RPN_to_pytorch.py b/aifeynman/RPN_to_pytorch.py
@@ -21,15 +21,13 @@
 from .S_get_number_DL_snapped import get_number_DL_snapped
 from .S_get_symbolic_expr_error import get_symbolic_expr_error
 
-# parameters: path to data, RPN expression (obtained from bf)
+# Parameters: path to data, RPN expression (obtained from bf)
 def RPN_to_pytorch(data, math_expr, lr = 1e-2, N_epochs = 500):
     param_dict = {}
     unsnapped_param_dict = {'p':1}
 
     def unsnap_recur(expr, param_dict, unsnapped_param_dict):
-        """Recursively transform each numerical value into a learnable parameter."""
-        import sympy
-        from sympy import Symbol
+        # Recursively transform each numerical value into a learnable parameter.
         if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi):
             used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict)
             unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False)
@@ -47,7 +45,7 @@ def unsnap_recur(expr, param_dict, unsnapped_param_dict):
 
 
     def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True):
-        """Get the next available key that does not collide with the keys in the dictionary."""
+        # Get the next available key that does not collide with the keys in the dictionary.
         if key + suffix not in iterable:
             return key + suffix
         else:
@@ -95,7 +93,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr
     y = torch.from_numpy(data[:,-1]).float()
 
     for i in range(N_epochs):
-        # this order is fixed i.e. first parameters
+        # This order is fixed i.e. first parameters
         yy = f(*input)
         loss = torch.mean((yy-y)**2)
         loss.backward()

diff --git a/aifeynman/S_NN_get_gradients.py b/aifeynman/S_NN_get_gradients.py
@@ -1,4 +1,4 @@
-# SAve a file with 2*(n-1) columns contaning the (n-1) independent variables and the (n-1) gradients of the trained NN with respect these variables
+# Save a file with 2*(n-1) columns containing the (n-1) independent variables and the (n-1) gradients of the trained NN with respect to these variables
 
 import matplotlib.pyplot as plt
 import numpy as np

diff --git a/aifeynman/S_NN_train.py b/aifeynman/S_NN_train.py
@@ -100,8 +100,8 @@ def forward(self, x):
                 x = self.linear5(x)
                 return x
 
-        my_dataset = utils.TensorDataset(factors,product) # create your datset
-        my_dataloader = utils.DataLoader(my_dataset, batch_size=bs, shuffle=True) # create your dataloader
+        my_dataset = utils.TensorDataset(factors,product) # Create your datset
+        my_dataloader = utils.DataLoader(my_dataset, batch_size=bs, shuffle=True) # Create your dataloader
 
         if is_cuda:
             model_feynman = SimpleNet(n_variables).cuda()

diff --git a/aifeynman/S_add_bf_on_numbers_on_pareto.py b/aifeynman/S_add_bf_on_numbers_on_pareto.py
@@ -1,4 +1,4 @@
-# Adds on the pareto all the snapped versions of a given expression (all paramters are snapped in the end)
+# Adds on the pareto to all the snapped versions of a given expression (all parameters are snapped in the end)
 
 import numpy as np
 import matplotlib.pyplot as plt
@@ -28,13 +28,11 @@
 
 from .S_get_number_DL_snapped import get_number_DL_snapped
 
-# parameters: path to data, math (not RPN) expression
+# Parameters: path to data, math (not RPN) expression
 def add_bf_on_numbers_on_pareto(pathdir, filename, PA, math_expr):
     input_data = np.loadtxt(pathdir+filename)
     def unsnap_recur(expr, param_dict, unsnapped_param_dict):
-        """Recursively transform each numerical value into a learnable parameter."""
-        import sympy
-        from sympy import Symbol
+        # Recursively transform each numerical value into a learnable parameter.
         if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi):
             used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict)
             unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False)
@@ -52,7 +50,7 @@ def unsnap_recur(expr, param_dict, unsnapped_param_dict):
 
 
     def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True):
-        """Get the next available key that does not collide with the keys in the dictionary."""
+        # Get the next available key that does not collide with the keys in the dictionary.
         if key + suffix not in iterable:
             return key + suffix
         else:
@@ -83,7 +81,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr
             bf_numbers = np.loadtxt("results.dat",usecols=(1,),dtype="str")
             new_numbers = copy.deepcopy(eq_numbers)
 
-            # replace the number under consideration by all the proposed bf numbers
+            # Replace the number under consideration by all the proposed bf numbers
             for kk in range(len(bf_numbers)):
                 eq = eq_
                 new_numbers[w] = parse_expr(RPN_to_eq(bf_numbers[kk]))

diff --git a/aifeynman/S_add_snap_expr_on_pareto.py b/aifeynman/S_add_snap_expr_on_pareto.py
@@ -1,4 +1,4 @@
-# Adds on the pareto all the snapped versions of a given expression (all paramters are snapped in the end)
+# Adds on the pareto to all the snapped versions of a given expression (all paramters are snapped in the end)
 
 import numpy as np
 import matplotlib.pyplot as plt
@@ -32,13 +32,11 @@ def intify(expr):
     ints = [i for i in floats if int(i) == i]
     return expr.xreplace(dict(zip(ints, [int(i) for i in ints])))
 
-# parameters: path to data, math (not RPN) expression
+# Parameters: path to data, math (not RPN) expression
 def add_snap_expr_on_pareto(pathdir, filename, math_expr, PA, DR_file=""):
     input_data = np.loadtxt(pathdir+filename)
     def unsnap_recur(expr, param_dict, unsnapped_param_dict):
-        """Recursively transform each numerical value into a learnable parameter."""
-        import sympy
-        from sympy import Symbol
+        # Recursively transform each numerical value into a learnable parameter.
         if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi):
             used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict)
             unsnapped_param_name = get_next_available_key(used_param_names, "pp", is_underscore=False)
@@ -56,7 +54,7 @@ def unsnap_recur(expr, param_dict, unsnapped_param_dict):
 
 
     def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True):
-        """Get the next available key that does not collide with the keys in the dictionary."""
+        # Get the next available key that does not collide with the keys in the dictionary.
         if key + suffix not in iterable:
             return key + suffix
         else:
@@ -85,8 +83,8 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr
             new_numbers = integerSnap(eq_numbers,w+1)
             new_numbers = {"pp"+str(k): v for k, v in new_numbers.items()}
             temp_unsnapped_param_dict.update(new_numbers)
-            #for kk in range(len(new_numbers)):
-            #    eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
+            # for kk in range(len(new_numbers)):
+            #     eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
             new_eq = re.sub(r"(pp\d*)",r"{\1}",str(eq))
             new_eq = new_eq.format_map(temp_unsnapped_param_dict)
             integer_snapped_expr = integer_snapped_expr + [parse_expr(new_eq)]
@@ -110,8 +108,8 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr
             new_numbers = rationalSnap(eq_numbers,w+1)
             new_numbers = {"pp"+str(k): v for k, v in new_numbers.items()}
             temp_unsnapped_param_dict.update(new_numbers)
-            #for kk in range(len(new_numbers)):
-            #    eq_numbers_snap[new_numbers[kk][0]] = new_numbers[kk][1][1:3]
+            # for kk in range(len(new_numbers)):
+            #     eq_numbers_snap[new_numbers[kk][0]] = new_numbers[kk][1][1:3]
             new_eq = re.sub(r"(pp\d*)",r"{\1}",str(eq))
             new_eq = new_eq.format_map(temp_unsnapped_param_dict)
             rational_snapped_expr = rational_snapped_expr + [parse_expr(new_eq)]
@@ -125,7 +123,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr
             # Calculate the error of the new, snapped expression
             snapped_error = get_symbolic_expr_error(input_data,str(snapped_expr[i]))
             # Calculate the complexity of the new, snapped expression
-            #expr = simplify(powsimp(snapped_expr[i]))
+            # expr = simplify(powsimp(snapped_expr[i]))
             expr = snapped_expr[i]
             for s in (expr.free_symbols):
                 s = symbols(str(s), real = True)
@@ -144,7 +142,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr
                 if n_operations!=0 or n_variables!=0:
                     snapped_complexity = snapped_complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations))
 
-            # If a da file is provided, replace the variables with the actual ones before calculating the complexity
+            # If a .dat file is provided, replace the variables with the actual ones before calculating the complexity
             else:
                 dr_data = np.loadtxt(DR_file,dtype="str",delimiter=",")
 
@@ -157,7 +155,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr
                 expr = parse_expr(expr)
                 for s in (expr.free_symbols):
                     s = symbols(str(s), real = True)
-                #expr =  simplify(parse_expr(str(expr),locals()))
+                # expr =  simplify(parse_expr(str(expr),locals()))
                 expr =  parse_expr(str(expr),locals())
                 snapped_complexity = 0
                 for j in numbers_expr:

diff --git a/aifeynman/S_add_sym_on_pareto.py b/aifeynman/S_add_sym_on_pareto.py
@@ -1,4 +1,4 @@
-# Combines 2 pareto fromtier obtained from the separability test into a new one.
+# Combines 2 pareto frontier expressions obtained from the separability test into a new one.
 
 from .get_pareto import Point, ParetoSet
 from sympy.parsing.sympy_parser import parse_expr

diff --git a/aifeynman/S_brute_force.py b/aifeynman/S_brute_force.py
@@ -1,6 +1,6 @@
-# runs BF on data and saves the best RPN expressions in results.dat
-# all the .dat files are created after I run this script
-# the .scr are needed to run the fortran code
+# Runs BF on data and saves the best RPN expressions in results.dat
+# All the .dat files are created after this script is run
+# The .scr are needed to run the fortran code
 
 import csv
 import os

diff --git a/aifeynman/S_brute_force_comp.py b/aifeynman/S_brute_force_comp.py
@@ -1,7 +1,9 @@
-# runs BF on data and saves the best RPN expressions in results.dat. This is used to find function proportional to the desired one using gradients
-# all the .dat files are created after I run this script
-# the .scr are needed to run the fortran code
-
+"""
+Runs BF on data and saves the best RPN expressions in results.dat
+All the .dat files are created after this script is run
+The .scr are needed to run the fortran code
+This is used to find function proportional to the desired one using gradients
+"""
 import csv
 import os
 import shutil

diff --git a/aifeynman/S_brute_force_gen_sym.py b/aifeynman/S_brute_force_gen_sym.py
@@ -1,6 +1,8 @@
-# runs BF on data and saves the best RPN expressions in results.dat
-# all the .dat files are created after I run this script
-# the .scr are needed to run the fortran code
+"""
+Runs BF on data and saves the best RPN expressions in results.dat
+All the .dat files are created after this script is run
+The .scr are needed to run the fortran code
+"""
 
 import csv
 import os

diff --git a/aifeynman/S_brute_force_number.py b/aifeynman/S_brute_force_number.py
@@ -1,6 +1,6 @@
-# runs BF on data and saves the best RPN expressions in results.dat
-# all the .dat files are created after I run this script
-# the .scr are needed to run the fortran code
+# Runs BF on data and saves the best RPN expressions in results.dat
+# All the .dat files are created after this script is run
+# The .scr are needed to run the fortran code
 
 import csv
 import os

diff --git a/aifeynman/S_combine_pareto.py b/aifeynman/S_combine_pareto.py
@@ -1,4 +1,4 @@
-# Combines 2 pareto fromtier obtained from the separability test into a new one.
+# Combines 2 pareto frontier expressions obtained from the separability test into a new one.
 
 from .get_pareto import Point, ParetoSet
 from .S_get_symbolic_expr_error import get_symbolic_expr_error
@@ -18,7 +18,7 @@ def combine_pareto(input_data,PA1,PA2,idx_list_1,idx_list_2,PA,sep_type = "+"):
     for i in range(len(PA1)):
         for j in range(len(PA2)):
             try:
-                # replace the variables from the separated parts with the variables reflecting the new combined equation
+                # Replace the variables from the separated parts with the variables reflecting the new combined equation
                 exp1 = PA1[i][2]
                 exp2 = PA2[j][2]
                 for k in range(len(idx_list_1)-1,-1,-1):

diff --git a/aifeynman/S_final_gd.py b/aifeynman/S_final_gd.py
@@ -21,15 +21,13 @@
 from .S_get_number_DL_snapped import get_number_DL_snapped
 from .S_get_symbolic_expr_error import get_symbolic_expr_error
 
-# parameters: path to data, RPN expression (obtained from bf)
+# Parameters: path to data, RPN expression (obtained from bf)
 def final_gd(data, math_expr, lr = 1e-2, N_epochs = 5000):
     param_dict = {}
     unsnapped_param_dict = {'p':1}
 
     def unsnap_recur(expr, param_dict, unsnapped_param_dict):
-        """Recursively transform each numerical value into a learnable parameter."""
-        import sympy
-        from sympy import Symbol
+        # Recursively transform each numerical value into a learnable parameter.
         if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi):
             used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict)
             unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False)
@@ -93,7 +91,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr
 
 
     for i in range(N_epochs):
-        # this order is fixed i.e. first parameters
+        # This order is fixed i.e. first parameters
         yy = f(*input)
         loss = torch.mean((yy-y)**2)
         loss.backward()
@@ -105,7 +103,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr
             break
 
     for i in range(N_epochs):
-        # this order is fixed i.e. first parameters
+        # This order is fixed i.e. first parameters
         yy = f(*input)
         loss = torch.mean((yy-y)**2)
         loss.backward()
@@ -120,7 +118,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr
         if torch.isnan(trainable_parameters[nan_i])==True or abs(trainable_parameters[nan_i])>1e7:
             return 1000000, 10000000, "1"
 
-    # get the updated symbolic regression
+    # Get the updated symbolic regression
     ii = -1
     for parm in unsnapped_param_dict:
         if ii == -1: