neuropsychology · DominiqueMakowski · Oct 2, 2023 · Sep 16, 2023 · Sep 16, 2023 · Sep 21, 2023
diff --git a/neurokit2/complexity/complexity_decorrelation.py b/neurokit2/complexity/complexity_decorrelation.py
@@ -1,10 +1,11 @@
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
 from ..signal import signal_autocor
 
 
-def complexity_decorrelation(signal):
+def complexity_decorrelation(signal, show=False):
     """**Decorrelation Time (DT)**
 
     The decorrelation time (DT) is defined as the time (in samples) of the first zero crossing of
@@ -17,6 +18,8 @@ def complexity_decorrelation(signal):
     ----------
     signal : Union[list, np.array, pd.Series]
         The signal (i.e., a time series) in the form of a vector of values.
+    show : bool
+        If True, will return a plot of the autocorrelation.
 
     Returns
     -------
@@ -36,11 +39,15 @@ def complexity_decorrelation(signal):
 
       import neurokit2 as nk
 
-      # Simulate a signal with duration os 2s
-      signal = nk.signal_simulate(duration=2, frequency=[5, 9, 12])
+      # Simulate a signal
+      signal = nk.signal_simulate(duration=5, sampling_rate=100, frequency=[5, 6], noise=0.5)
 
       # Compute DT
-      dt, _ = nk.complexity_decorrelation(signal)
+      @savefig p_complexity_decorrelation1.png scale=100%
+      dt, _ = nk.complexity_decorrelation(signal, show=True)
+      @suppress
+      plt.close()
+
       dt
 
     References
@@ -60,12 +67,28 @@ def complexity_decorrelation(signal):
         )
 
     # Unbiased autocor (see https://github.com/mne-tools/mne-features/)
-    autocor, _ = signal_autocor(signal, method="unbiased")
+    autocor, _ = signal_autocor(signal, unbiased=True)
 
     # Get zero-crossings
     zc = np.diff(np.sign(autocor)) != 0
     if np.any(zc):
         dt = np.argmax(zc) + 1
     else:
         dt = -1
+
+    if show is True:
+        # Max length of autocorrelation to plot
+        max_len = int(dt * 4)
+        if max_len > len(autocor):
+            max_len = len(autocor)
+
+        plt.plot(autocor[0:max_len])
+        plt.xlabel("Lag")
+        plt.ylabel("Autocorrelation")
+        plt.xticks(np.arange(0, max_len, step=dt).astype(int))
+        plt.axvline(dt, color="red", linestyle="--", label=f"DT = {dt}")
+        plt.axhline(0, color="black", linestyle="--")
+        plt.title("Decorrelation Time (DT)")
+        plt.legend()
+
     return dt, {}
diff --git a/neurokit2/complexity/complexity_lyapunov.py b/neurokit2/complexity/complexity_lyapunov.py
@@ -5,8 +5,9 @@
 import numpy as np
 import pandas as pd
 import sklearn.metrics.pairwise
+import sklearn.neighbors
 
-from ..misc import NeuroKitWarning
+from ..misc import NeuroKitWarning, find_knee
 from ..signal.signal_psd import signal_psd
 from .utils_complexity_embedding import complexity_embedding
 
@@ -16,17 +17,15 @@ def complexity_lyapunov(
     delay=1,
     dimension=2,
     method="rosenstein1993",
-    len_trajectory=20,
-    matrix_dim=4,
-    min_neighbors="default",
+    separation="auto",
     **kwargs,
 ):
     """**(Largest) Lyapunov Exponent (LLE)**
 
     Lyapunov exponents (LE) describe the rate of exponential separation (convergence or divergence)
     of nearby trajectories of a dynamical system. It is a measure of sensitive dependence on
     initial conditions, i.e. how quickly two nearby states diverge. A system can have multiple LEs,
-    equal to thenumber of the dimensionality of the phase space, and the largest LE value, "LLE" is
+    equal to the number of the dimensionality of the phase space, and the largest LE value, "LLE" is
     often used to determine the overall predictability of the dynamical system.
 
     Different algorithms exist to estimate these indices:
@@ -37,13 +36,17 @@ def complexity_lyapunov(
       neighbouring points are then tracked along their distance trajectories for a number of data
       points. The slope of the line using a least-squares fit of the mean log trajectory of the
       distances gives the final LLE.
-    * **Eckmann et al. (1996)** computes LEs by first reconstructing the time series using a
+    * **Makowski** is a custom modification of Rosenstein's algorithm, using KDTree for more
+      efficient nearest neighbors computation. Additionally, the LLE is computed as the slope up to
+      the changepoint of divergence rate (the point where it flattens out), making it more robust
+      to the length trajectory parameter.
+    * **Eckmann et al. (1986)** computes LEs by first reconstructing the time series using a
       delay-embedding method, and obtains the tangent that maps to the reconstructed dynamics using
       a least-squares fit, where the LEs are deduced from the tangent maps.
 
     .. warning::
 
-      The **Eckman (1996)** method currently does not work. Please help us fixing it by double
+      The **Eckman (1986)** method currently does not work. Please help us fixing it by double
       checking the code, the paper and helping us figuring out what's wrong. Overall, we would like
       to improve this function to return for instance all the exponents (Lyapunov spectrum),
       implement newer and faster methods (e.g., Balcerzak, 2018, 2020), etc. If you're interested
@@ -59,17 +62,17 @@ def complexity_lyapunov(
     dimension : int
         Embedding Dimension (*m*, sometimes referred to as *d* or *order*). See
         :func:`complexity_dimension` to estimate the optimal value for this parameter. If method
-        is ``"eckmann1996"``, larger values for dimension are recommended.
+        is ``"eckmann1986"``, larger values for dimension are recommended.
     method : str
-        The method that defines the algorithm for computing LE. Can be one of ``"rosenstein1993"``
-        or ``"eckmann1996"``.
+        The method that defines the algorithm for computing LE. Can be one of ``"rosenstein1993"``,
+        ``"makowski"``, or ``"eckmann1986"``.
     len_trajectory : int
         Applies when method is ``"rosenstein1993"``. The number of data points in which
         neighboring trajectories are followed.
     matrix_dim : int
-        Applies when method is ``"eckmann1996"``. Corresponds to the number of LEs to return.
+        Applies when method is ``"eckmann1986"``. Corresponds to the number of LEs to return.
     min_neighbors : int, str
-        Applies when method is ``"eckmann1996"``. Minimum number of neighbors. If ``"default"``,
+        Applies when method is ``"eckmann1986"``. Minimum number of neighbors. If ``"default"``,
         ``min(2 * matrix_dim, matrix_dim + 4)`` is used.
     **kwargs : optional
         Other arguments to be passed to ``signal_psd()`` for calculating the minimum temporal
@@ -79,7 +82,7 @@ def complexity_lyapunov(
     --------
     lle : float
         An estimate of the largest Lyapunov exponent (LLE) if method is ``"rosenstein1993"``, and
-        an array of LEs if ``"eckmann1996"``.
+        an array of LEs if ``"eckmann1986"``.
     info : dict
         A dictionary containing additional information regarding the parameters used
         to compute LLE.
@@ -90,13 +93,24 @@ def complexity_lyapunov(
 
       import neurokit2 as nk
 
-      signal = nk.signal_simulate(duration=3, sampling_rate=100, frequency=[5, 8], noise=0.5)
+      signal = nk.signal_simulate(duration=5, sampling_rate=100, frequency=[5, 8], noise=0.1)
+
+      # Rosenstein's method
+      @savefig p_complexity_lyapunov1.png scale=100%
+      lle, info = nk.complexity_lyapunov(signal, method="rosenstein", show=True)
+      @suppress
+      plt.close()
 
-      lle, info = nk.complexity_lyapunov(signal, method="rosenstein1993", show=True)
       lle
 
+      # Makowski's change-point method
+      @savefig p_complexity_lyapunov2.png scale=100%
+      lle, info = nk.complexity_lyapunov(signal, method="makowski", show=True)
+      @suppress
+      plt.close()
+
       # Eckman's method is broken. Please help us fix-it!
-      # lle, info = nk.complexity_lyapunov(signal, dimension=2, method="eckmann1996")
+      # lle, info = nk.complexity_lyapunov(signal, dimension=2, method="eckmann1986")
 
     References
     ----------
@@ -120,34 +134,45 @@ def complexity_lyapunov(
 
     # "We impose the additional constraint that nearest neighbors have a temporal separation
     # greater than the mean period of the time series: This allows us to consider each pair of
-    # neighbors as nearby initial conditions for different trajectories.""
+    # neighbors as nearby initial conditions for different trajectories."
 
     # "We estimated the mean period as the reciprocal of the mean frequency of the power spectrum,
     # although we expect any comparable estimate, e.g., using the median frequency of the magnitude
     # spectrum, to yield equivalent results."
+    if separation == "auto":
+        # Actual sampling rate does not matter
+        psd = signal_psd(
+            signal, sampling_rate=1000, method="fft", normalize=False, show=False
+        )
+        mean_freq = np.sum(psd["Power"] * psd["Frequency"]) / np.sum(psd["Power"])
 
-    # Actual sampling rate does not matter
-    psd = signal_psd(signal, sampling_rate=1000, method="fft", normalize=False, show=False)
-    mean_freq = np.sum(psd["Power"] * psd["Frequency"]) / np.sum(psd["Power"])
-
-    # 1 / mean_freq = seconds per cycle
-    separation = int(np.ceil(1 / mean_freq * 1000))
+        # 1 / mean_freq = seconds per cycle
+        separation = int(np.ceil(1 / mean_freq * 1000))
+    else:
+        assert isinstance(separation, int), "'separation' should be an integer."
 
     # Run algorithm
     # ----------------
     # Method
     method = method.lower()
     if method in ["rosenstein", "rosenstein1993"]:
         le, parameters = _complexity_lyapunov_rosenstein(
-            signal, delay, dimension, separation, len_trajectory, **kwargs
+            signal, delay, dimension, separation, **kwargs
+        )
+    elif method in ["makowski"]:
+        le, parameters = _complexity_lyapunov_makowski(
+            signal, delay, dimension, separation, **kwargs
         )
-    elif method in ["eckmann", "eckmann1996"]:
+    elif method in ["eckmann", "eckmann1986", "eckmann1986"]:
         le, parameters = _complexity_lyapunov_eckmann(
             signal,
             dimension=dimension,
             separation=separation,
-            matrix_dim=matrix_dim,
-            min_neighbors=min_neighbors,
+        )
+    else:
+        raise ValueError(
+            "NeuroKit error: complexity_lyapunov(): 'method' should be one of "
+            " 'rosenstein1993', 'makowski', 'eckmann1986'."
         )
 
     # Store params
@@ -167,10 +192,83 @@ def complexity_lyapunov(
 # =============================================================================
 
 
+def _complexity_lyapunov_makowski(
+    signal,
+    delay=1,
+    dimension=2,
+    separation=1,
+    max_length="auto",
+    show=False,
+):
+    # Store parameters
+    info = {
+        "Dimension": dimension,
+        "Delay": delay,
+    }
+
+    # Embedding
+    embedded = complexity_embedding(signal, delay=delay, dimension=dimension)
+    n = len(embedded)
+
+    # Set the maxiimum trajectory length to 10 times the delay
+    if max_length == "auto":
+        max_length = int(delay * 10)
+    if max_length >= n / 2:
+        max_length = n // 2
+
+    # Create KDTree and query for nearest neighbors
+    tree = sklearn.neighbors.KDTree(embedded, metric="euclidean")
+
+    # Query for nearest neighbors. To ensure we get a neighbor outside of the `separation`,
+    # k=1 is the point itself, k=2 is the nearest neighbor, and k=3 is the second nearest neighbor.
+    idx = tree.query(embedded, k=2 + separation, return_distance=False)
+
+    # The neighbor outside the `separation` region will be the last one in the returned list.
+    idx = idx[:, -1]
+
+    # Compute the average divergence for each trajectory length
+    trajectories = np.zeros(max_length)
+    for k in range(1, max_length + 1):
+        valid = np.where((np.arange(n - k) + k < n) & (idx[: n - k] + k < n))[0]
+
+        if valid.size == 0:
+            trajectories[k - 1] = -np.inf
+            continue
+
+        divergences = np.linalg.norm(
+            embedded[valid + k] - embedded[idx[valid] + k],
+            axis=1,
+        )
+        divergences = divergences[divergences > 0]
+        if len(divergences) == 0:
+            trajectories[k - 1] = np.nan
+        else:
+            trajectories[k - 1] = np.mean(np.log(divergences))
+
+    # Change point
+    x_axis = range(1, len(trajectories) + 1)
+    knee = find_knee(y=trajectories, x=x_axis, show=False, verbose=False)
+    info["Divergence_Rate"] = trajectories
+    info["Changepoint"] = knee
+
+    # Linear fit
+    slope, intercept = np.polyfit(x_axis[0:knee], trajectories[0:knee], 1)
+    if show is True:
+        plt.plot(np.arange(1, len(trajectories) + 1), trajectories)
+        plt.axvline(knee, color="red", label="Changepoint", linestyle="--")
+        plt.axline(
+            (0, intercept), slope=slope, color="orange", label="Least-squares Fit"
+        )
+        plt.ylim(bottom=np.min(trajectories))
+        plt.ylabel("Divergence Rate")
+        plt.title(f"Largest Lyapunov Exponent (slope of the line) = {slope:.3f}")
+        plt.legend()
+    return slope, info
+
+
 def _complexity_lyapunov_rosenstein(
     signal, delay=1, dimension=2, separation=1, len_trajectory=20, show=False, **kwargs
 ):
-
     # 1. Check that sufficient data points are available
     # Minimum length required to find single orbit vector
     min_len = (dimension - 1) * delay + 1
@@ -200,7 +298,9 @@ def _complexity_lyapunov_rosenstein(
 
     # Find indices of nearest neighbours
     ntraj = m - len_trajectory + 1
-    min_dist_indices = np.argmin(dists[:ntraj, :ntraj], axis=1)  # exclude last few indices
+    min_dist_indices = np.argmin(
+        dists[:ntraj, :ntraj], axis=1
+    )  # exclude last few indices
     min_dist_indices = min_dist_indices.astype(int)
 
     # Follow trajectories of neighbour pairs for len_trajectory data points
@@ -217,16 +317,25 @@ def _complexity_lyapunov_rosenstein(
     divergence_rate = trajectories[np.isfinite(trajectories)]
 
     # LLE obtained by least-squares fit to average line
-    slope, intercept = np.polyfit(np.arange(1, len(divergence_rate) + 1), divergence_rate, 1)
+    slope, intercept = np.polyfit(
+        np.arange(1, len(divergence_rate) + 1), divergence_rate, 1
+    )
+
+    # Store info
+    parameters = {
+        "Trajectory_Length": len_trajectory,
+        "Divergence_Rate": divergence_rate,
+    }
 
     if show is True:
         plt.plot(np.arange(1, len(divergence_rate) + 1), divergence_rate)
-        plt.axline((0, intercept), slope=slope, color="orange", label="Least-squares Fit")
+        plt.axline(
+            (0, intercept), slope=slope, color="orange", label="Least-squares Fit"
+        )
         plt.ylabel("Divergence Rate")
+        plt.title(f"Largest Lyapunov Exponent (slope of the line) = {slope:.3f}")
         plt.legend()
 
-    parameters = {"Trajectory_Length": len_trajectory}
-
     return slope, parameters
 
 
@@ -279,7 +388,9 @@ def _complexity_lyapunov_eckmann(
 
         # get neighbors within the radius
         r = distances[i][neighbour_furthest]
-        neighbors = np.where(distances[i] <= r)[0]  # should have length = min_neighbours
+        neighbors = np.where(distances[i] <= r)[
+            0
+        ]  # should have length = min_neighbours
 
         # Find matrix T_i (matrix_dim * matrix_dim) that sends points from neighbourhood of x(i) to x(i+1)
         vec_beta = signal[neighbors + matrix_dim * m] - signal[i + matrix_dim * m]
@@ -289,7 +400,9 @@ def _complexity_lyapunov_eckmann(
         # form matrix T_i
         t_i = np.zeros((matrix_dim, matrix_dim))
         t_i[:-1, 1:] = np.identity(matrix_dim - 1)
-        t_i[-1] = np.linalg.lstsq(matrix, vec_beta, rcond=-1)[0]  # least squares solution
+        t_i[-1] = np.linalg.lstsq(matrix, vec_beta, rcond=-1)[
+            0
+        ]  # least squares solution
 
         # QR-decomposition of T * old_Q
         mat_Q, mat_R = np.linalg.qr(np.dot(t_i, old_Q))