Merge pull request #1101 from janmayer/fix_invalid_escape_sequence

Fix invalid escape sequence
SheffieldML · Oct 27, 2024 · 1fcb408 · 1fcb408
2 parents aa49220 + c04c9b3
commit 1fcb408
Show file tree

Hide file tree

Showing 52 changed files with 394 additions and 393 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,7 @@
 # Changelog
 
 ## Unreleased
+* fix invalid escape sequence #1011 [janmayer]
 
 ## v1.13.2 (2024-07-21)
 * update string checks in initialization method for latent variable and put `empirical_samples` init-method on a deprecation path

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
@@ -194,7 +194,7 @@ def save_model(self, output_filename, compress=True, save_data=True):
     # Make sure to name this variable and the predict functions will "just work"
     # In maths the predictive variable is:
     #         K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
-    #         W_{pp} := \texttt{Woodbury inv}
+    #         W_{pp} := \\texttt{Woodbury inv}
     #         p := _predictive_variable
 
     @property
@@ -283,7 +283,7 @@ def parameters_changed(self):
 
     def log_likelihood(self):
         """
-        The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
+        The log marginal likelihood of the model, :math:`p(\\mathbf{y})`, this is the objective function of the model being optimised
         """
         return self._log_marginal_likelihood
 
@@ -296,9 +296,9 @@ def _raw_predict(self, Xnew, full_cov=False, kern=None):
         diagonal of the covariance is returned.
 
         .. math::
-            p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
-                        = N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
-            \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
+            p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df
+                        = N(f*| K_{x*x}(K_{xx} + \\Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \\Sigma)^{-1}K_{xx*}
+            \\Sigma := \\texttt{Likelihood.variance / Approximate likelihood covariance}
         """
         mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
         if self.mean_function is not None:
@@ -702,7 +702,7 @@ def log_predictive_density(self, x_test, y_test, Y_metadata=None):
         Calculation of the log predictive density
 
         .. math:
-            p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
+            p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
 
         :param x_test: test locations (x_{*})
         :type x_test: (Nx1) array
@@ -718,7 +718,7 @@ def log_predictive_density_sampling(self, x_test, y_test, Y_metadata=None, num_s
         Calculation of the log predictive density by sampling
 
         .. math:
-            p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
+            p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
 
         :param x_test: test locations (x_{*})
         :type x_test: (Nx1) array
@@ -734,24 +734,24 @@ def log_predictive_density_sampling(self, x_test, y_test, Y_metadata=None, num_s
 
     def _raw_posterior_covariance_between_points(self, X1, X2):
         """
-        Computes the posterior covariance between points. Does not account for 
+        Computes the posterior covariance between points. Does not account for
         normalization or likelihood
 
         :param X1: some input observations
         :param X2: other input observations
 
-        :returns: 
+        :returns:
             cov: raw posterior covariance: k(X1,X2) - k(X1,X) G^{-1} K(X,X2)
         """
         return self.posterior.covariance_between_points(self.kern, self.X, X1, X2)
 
 
-    def posterior_covariance_between_points(self, X1, X2, Y_metadata=None, 
-                                            likelihood=None, 
+    def posterior_covariance_between_points(self, X1, X2, Y_metadata=None,
+                                            likelihood=None,
                                             include_likelihood=True):
         """
-        Computes the posterior covariance between points. Includes likelihood 
-        variance as well as normalization so that evaluation at (x,x) is consistent 
+        Computes the posterior covariance between points. Includes likelihood
+        variance as well as normalization so that evaluation at (x,x) is consistent
         with model.predict
 
         :param X1: some input observations
@@ -762,8 +762,8 @@ def posterior_covariance_between_points(self, X1, X2, Y_metadata=None,
                                    the predicted underlying latent function f.
         :type include_likelihood: bool
 
-        :returns: 
-            cov: posterior covariance, a Numpy array, Nnew x Nnew if 
+        :returns:
+            cov: posterior covariance, a Numpy array, Nnew x Nnew if
             self.output_dim == 1, and Nnew x Nnew x self.output_dim otherwise.
         """
 
@@ -774,7 +774,7 @@ def posterior_covariance_between_points(self, X1, X2, Y_metadata=None,
             mean, _ = self._raw_predict(X1, full_cov=True)
             if likelihood is None:
                 likelihood = self.likelihood
-            _, cov = likelihood.predictive_values(mean, cov, full_cov=True, 
+            _, cov = likelihood.predictive_values(mean, cov, full_cov=True,
                                                   Y_metadata=Y_metadata)
 
         if self.normalizer is not None:

diff --git a/GPy/core/symbolic.py b/GPy/core/symbolic.py
@@ -44,7 +44,7 @@ def __init__(self, expressions, cacheable, derivatives=None, parameters=None, fu
         self._set_derivatives(derivatives)
         self._set_parameters(parameters)
         # Convert the expressions to a list for common sub expression elimination
-        # We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'. 
+        # We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
         self.update_expression_list()
 
         # Apply any global stabilisation operations to expressions.
@@ -86,7 +86,7 @@ def extract_vars(expr):
         # object except as cached. For covariance functions this is X
         # and Z, for likelihoods F and for mapping functions X.
         self.cacheable_vars = [] # list of everything that's cacheable
-        for var in cacheable:            
+        for var in cacheable:
             self.variables[var] = [e for e in vars if e.name.split('_')[0]==var.lower()]
             self.cacheable_vars += self.variables[var]
         for var in cacheable:
@@ -105,7 +105,7 @@ def extract_derivative(function, derivative_arguments):
             for derivative in derivatives:
                 derivative_arguments += self.variables[derivative]
 
-            # Do symbolic work to compute derivatives.        
+            # Do symbolic work to compute derivatives.
             for key, func in self.expressions.items():
                 # if func['function'].is_Matrix:
                 #     rows = func['function'].shape[0]
@@ -126,7 +126,7 @@ def _set_parameters(self, parameters):
                 if theta.name in parameters:
                     val = parameters[theta.name]
             # Add parameter.
-            
+
             self.link_parameters(Param(theta.name, val, None))
             #self._set_attribute(theta.name, )
 
@@ -174,7 +174,7 @@ def eval_update_gradients(self, function, partial, **kwargs):
             code = self.code[function]['derivative'][theta.name]
             gradient[theta.name] = (partial*eval(code, self.namespace)).sum()
         return gradient
-        
+
     def eval_gradients_X(self, function, partial, **kwargs):
         if 'X' in kwargs:
             gradients_X = np.zeros_like(kwargs['X'])
@@ -194,7 +194,7 @@ def code_parameters_changed(self):
         for variable, code in self.variable_sort(self.code['parameters_changed']):
             lcode += self._print_code(variable) + ' = ' + self._print_code(code) + '\n'
         return lcode
-    
+
     def code_update_cache(self):
         lcode = ''
         for var in self.cacheable:
@@ -208,7 +208,7 @@ def code_update_cache(self):
             for i, theta in enumerate(self.variables[var]):
                 lcode+= "\t" + var + '= np.atleast_2d(' + var + ')\n'
                 lcode+= "\t" + self._print_code(theta.name) + ' = ' + var + '[:, ' + str(i) + "]" + reorder + "\n"
-    
+
         for variable, code in self.variable_sort(self.code['update_cache']):
             lcode+= self._print_code(variable) + ' = ' + self._print_code(code) + "\n"
 
@@ -250,7 +250,7 @@ def _set_attribute(self, name, value):
         """Make sure namespace gets updated when setting attributes."""
         setattr(self, name, value)
         self.namespace.update({name: getattr(self, name)})
-        
+
 
     def update_expression_list(self):
         """Extract a list of expressions from the dictionary of expressions."""
@@ -260,9 +260,9 @@ def update_expression_list(self):
         for fname, fexpressions in self.expressions.items():
             for type, texpressions in fexpressions.items():
                 if type == 'function':
-                    self.expression_list.append(texpressions)            
+                    self.expression_list.append(texpressions)
                     self.expression_keys.append([fname, type])
-                    self.expression_order.append(1) 
+                    self.expression_order.append(1)
                 elif type[-10:] == 'derivative':
                     for dtype, expression in texpressions.items():
                         self.expression_list.append(expression)
@@ -274,9 +274,9 @@ def update_expression_list(self):
                         elif type[:-10] == 'third_':
                             self.expression_order.append(5) #sym.count_ops(self.expressions[type][dtype]))
                 else:
-                    self.expression_list.append(fexpressions[type])            
+                    self.expression_list.append(fexpressions[type])
                     self.expression_keys.append([fname, type])
-                    self.expression_order.append(2) 
+                    self.expression_order.append(2)
 
         # This step may be unecessary.
         # Not 100% sure if the sub expression elimination is order sensitive. This step orders the list with the 'function' code first and derivatives after.
@@ -313,7 +313,7 @@ def extract_sub_expressions(self, cache_prefix='cache', sub_prefix='sub', prefix
             sym_var = sym.var(cache_prefix + str(i))
             self.variables[cache_prefix].append(sym_var)
             replace_dict[expr.name] = sym_var
-            
+
         for i, expr in enumerate(params_change_list):
             sym_var = sym.var(sub_prefix + str(i))
             self.variables[sub_prefix].append(sym_var)
@@ -329,7 +329,7 @@ def extract_sub_expressions(self, cache_prefix='cache', sub_prefix='sub', prefix
         for keys in self.expression_keys:
             for replace, void in common_sub_expressions:
                 setInDict(self.expressions, keys, getFromDict(self.expressions, keys).subs(replace, replace_dict[replace.name]))
-        
+
         self.expressions['parameters_changed'] = {}
         self.expressions['update_cache'] = {}
         for var, expr in common_sub_expressions:
@@ -339,7 +339,7 @@ def extract_sub_expressions(self, cache_prefix='cache', sub_prefix='sub', prefix
                 self.expressions['update_cache'][replace_dict[var.name].name] = expr
             else:
                 self.expressions['parameters_changed'][replace_dict[var.name].name] = expr
-            
+
 
     def _gen_code(self):
         """Generate code for the list of expressions provided using the common sub-expression eliminator to separate out portions that are computed multiple times."""
@@ -357,8 +357,8 @@ def match_key(expr):
             return code
 
         self.code = match_key(self.expressions)
-                            
- 
+
+
     def _expr2code(self, arg_list, expr):
         """Convert the given symbolic expression into code."""
         code = lambdastr(arg_list, expr)
@@ -379,7 +379,7 @@ def _print_code(self, code):
     def _display_expression(self, keys, user_substitutes={}):
         """Helper function for human friendly display of the symbolic components."""
         # Create some pretty maths symbols for the display.
-        sigma, alpha, nu, omega, l, variance = sym.var('\sigma, \alpha, \nu, \omega, \ell, \sigma^2')
+        sigma, alpha, nu, omega, l, variance = sym.var(r'\sigma, \alpha, \nu, \omega, \ell, \sigma^2')
         substitutes = {'scale': sigma, 'shape': alpha, 'lengthscale': l, 'variance': variance}
         substitutes.update(user_substitutes)
 
@@ -416,5 +416,5 @@ def sort_key(x):
                 return int(digits[0])
             else:
                 return x[0]
-            
+
         return sorted(var_dict.items(), key=sort_key, reverse=reverse)
diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py
@@ -134,10 +134,10 @@ def _recompute(mean_prior, K, ga_approx):
         B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
         L = jitchol(B)
         V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
-        Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
+        Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \\Sigma^(-1))^(-1)
 
         aux_alpha , _ = dpotrs(L, tau_tilde_root * (np.dot(K, ga_approx.v) + mean_prior), lower=1)
-        alpha = ga_approx.v - tau_tilde_root * aux_alpha #(K + Sigma^(\tilde))^(-1) (/mu^(/tilde) - /mu_p)
+        alpha = ga_approx.v - tau_tilde_root * aux_alpha #(K + Sigma^(\\tilde))^(-1) (/mu^(/tilde) - /mu_p)
         mu = np.dot(K, alpha) + mean_prior
 
         return posteriorParams(mu=mu, Sigma=Sigma, L=L)
@@ -151,8 +151,8 @@ def _update_rank1(self, LLT, Kmn, delta_v, delta_tau, i):
         DSYR(LLT,Kmn[:,i].copy(),delta_tau)
         L = jitchol(LLT)
         V,info = dtrtrs(L,Kmn,lower=1)
-        self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps)  #diag(K_nm (L L^\top)^(-1)) K_mn
-        si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
+        self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps)  #diag(K_nm (L L^\\top)^(-1)) K_mn
+        si = np.sum(V.T*V[:,i],-1) #(V V^\\top)[:,i]
         self.mu += (delta_v-delta_tau*self.mu[i])*si
         #mu = np.dot(Sigma, v_tilde)
 
@@ -391,11 +391,11 @@ def _inference(self, Y, mean_prior, K, ga_approx, cav_params, likelihood, Z_tild
 
 
         aux_alpha , _ = dpotrs(post_params.L, tau_tilde_root * (np.dot(K, ga_approx.v) +  mean_prior), lower=1)
-        alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) (/mu^(/tilde) -  /mu_p)
+        alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\\tilde))^(-1) (/mu^(/tilde) -  /mu_p)
 
         LWi, _ = dtrtrs(post_params.L, np.diag(tau_tilde_root), lower=1)
         Wi = np.dot(LWi.T,LWi)
-        symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
+        symmetrify(Wi) #(K + Sigma^(\\tilde))^(-1)
 
         dL_dK = 0.5 * (tdot(alpha) - Wi)
         dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
@@ -530,7 +530,7 @@ def _init_approximations(self, Kmm, Kmn, num_data):
         #initial values - Gaussian factors
         #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
         LLT0 = Kmm.copy()
-        Lm = jitchol(LLT0) #K_m = L_m L_m^\top
+        Lm = jitchol(LLT0) #K_m = L_m L_m^\\top
         Vm,info = dtrtrs(Lm, Kmn,lower=1)
         # Lmi = dtrtri(Lm)
         # Kmmi = np.dot(Lmi.T,Lmi)

diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
@@ -27,7 +27,7 @@ def __init__(self):
         """
         Laplace Approximation
 
-        Find the moments \hat{f} and the hessian at this point
+        Find the moments \\hat{f} and the hessian at this point
         (using Newton-Raphson) of the unnormalised posterior
 
         """

diff --git a/GPy/inference/latent_function_inference/pep.py b/GPy/inference/latent_function_inference/pep.py
@@ -8,14 +8,14 @@
 class PEP(LatentFunctionInference):
     '''
     Sparse Gaussian processes using Power-Expectation Propagation
-    for regression: alpha \approx 0 gives VarDTC and alpha = 1 gives FITC
-    
-    Reference: A Unifying Framework for Sparse Gaussian Process Approximation using 
+    for regression: alpha \\approx 0 gives VarDTC and alpha = 1 gives FITC
+
+    Reference: A Unifying Framework for Sparse Gaussian Process Approximation using
     Power Expectation Propagation, https://arxiv.org/abs/1605.07066
-    
+
     '''
     const_jitter = 1e-6
-    
+
     def __init__(self, alpha):
         super(PEP, self).__init__()
         self.alpha = alpha
@@ -69,7 +69,7 @@ def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=No
         #compute dL_dR
         Uv = np.dot(U, v)
         dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - (1.0+alpha_const_term)/beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) \
-            + np.sum(np.square(Uv), 1))*beta_star**2 
+            + np.sum(np.square(Uv), 1))*beta_star**2
 
         # Compute dL_dKmm
         vvT_P = tdot(v.reshape(-1,1)) + P

diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py
@@ -82,7 +82,7 @@ def mean(self):
         Posterior mean
         $$
         K_{xx}v
-        v := \texttt{Woodbury vector}
+        v := \\texttt{Woodbury vector}
         $$
         """
         if self._mean is None:
@@ -95,7 +95,7 @@ def covariance(self):
         Posterior covariance
         $$
         K_{xx} - K_{xx}W_{xx}^{-1}K_{xx}
-        W_{xx} := \texttt{Woodbury inv}
+        W_{xx} := \\texttt{Woodbury inv}
         $$
         """
         if self._covariance is None:
@@ -146,8 +146,8 @@ def woodbury_chol(self):
         """
         return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
         $$
-        L_{W}L_{W}^{\top} = W^{-1}
-        W^{-1} := \texttt{Woodbury inv}
+        L_{W}L_{W}^{\\top} = W^{-1}
+        W^{-1} := \\texttt{Woodbury inv}
         $$
         """
         if self._woodbury_chol is None:
@@ -178,8 +178,8 @@ def woodbury_inv(self):
         """
         The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
         $$
-        (K_{xx} + \Sigma_{xx})^{-1}
-        \Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
+        (K_{xx} + \\Sigma_{xx})^{-1}
+        \\Sigma_{xx} := \\texttt{Likelihood.variance / Approximate likelihood covariance}
         $$
         """
         if self._woodbury_inv is None:
@@ -200,8 +200,8 @@ def woodbury_vector(self):
         """
         Woodbury vector in the gaussian likelihood case only is defined as
         $$
-        (K_{xx} + \Sigma)^{-1}Y
-        \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
+        (K_{xx} + \\Sigma)^{-1}Y
+        \\Sigma := \\texttt{Likelihood.variance / Approximate likelihood covariance}
         $$
         """
         if self._woodbury_vector is None: