diff --git a/CMakeLists.txt b/CMakeLists.txt
index 509e07b..57fd908 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 ################################################################################
 # Check required CMake version
-set(REQUIRED_CMAKE_VERSION "3.14.0")
+set(REQUIRED_CMAKE_VERSION "3.25.0")
 cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION})
 
 if(INPUT_POLYFEMPY_DATA_ROOT)
@@ -13,11 +13,18 @@ if(NOT EXISTS ${POLYFEMPY_DATA_ROOT})
 endif()
 
 project(polyfempy)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
 ################################################################################
 
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/recipes/")
 
+include(polyfem_cpm_cache)
+
 # Color output
 include(UseColors)
 
@@ -51,11 +58,9 @@ include(polyfem_data)
 ################################################################################
 # Subdirectories
 ################################################################################
-add_library(polyfempy MODULE src/binding.cpp src/raster.cpp)
+pybind11_add_module(polyfempy)
+add_subdirectory(src)
+target_include_directories(polyfempy PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")
 target_link_libraries(polyfempy PRIVATE polyfem::polyfem pybind11::module pybind11::json)
 
 set_target_properties(polyfempy PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" SUFFIX "${PYTHON_MODULE_EXTENSION}")
-
-
-
-
diff --git a/README.md b/README.md
index ef00ea4..250ec6b 100644
--- a/README.md
+++ b/README.md
@@ -8,29 +8,12 @@
 [![Anaconda-Server Badge](https://anaconda.org/conda-forge/polyfempy/badges/installer/conda.svg)](https://conda.anaconda.org/conda-forge)
 
 
-
-
-The python bindings are in alpha. Expect a lot of API changes and possible bugs. Use at your own peril!
+The Python bindings are in alpha. Expect a lot of API changes and possible bugs. Use at your own peril!
 
 <br/>
-I am making efforts to provide a simple python interface to Polyfem.
-
-For doing so I am maintaining  a *conda* package which can be easily installed [https://anaconda.org/conda-forge/polyfempy](https://anaconda.org/conda-forge/polyfempy).
-
-Note that the conda deployment is slow and this tutorial will follow the deployment version.
-
-If you hare in a hurry for the juicy latest feature you can clone the repository [Polyfem-python](https://github.com/polyfem/polyfem-python) and use `pip` to install:
+To use the Python bindings, clone the current repository and use `pip` to install:
 ```
-python setup.py install
+pip install .
 ```
-and
-```
-python setup.py test
-```
-for testing.
-
-For python documentation [https://polyfem.github.io/python/](https://polyfem.github.io/python/).
-
-For python jupyter notebook [https://polyfem.github.io/python_examples/](https://polyfem.github.io/python_examples/).
 
 For full documentation see [https://polyfem.github.io/](https://polyfem.github.io/).
diff --git a/cmake/recipes/CPM.cmake b/cmake/recipes/CPM.cmake
new file mode 100644
index 0000000..70987dc
--- /dev/null
+++ b/cmake/recipes/CPM.cmake
@@ -0,0 +1,33 @@
+set(CPM_DOWNLOAD_VERSION 0.39.0)
+
+if(CPM_SOURCE_CACHE)
+  set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+elseif(DEFINED ENV{CPM_SOURCE_CACHE})
+  set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+else()
+  set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+endif()
+
+# Expand relative path. This is important if the provided path contains a tilde (~)
+get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
+
+function(download_cpm)
+  message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}")
+  file(DOWNLOAD
+       https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
+       ${CPM_DOWNLOAD_LOCATION}
+  )
+endfunction()
+
+if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))
+  download_cpm()
+else()
+  # resume download if it previously failed
+  file(READ ${CPM_DOWNLOAD_LOCATION} check)
+  if("${check}" STREQUAL "")
+    download_cpm()
+  endif()
+  unset(check)
+endif()
+
+include(${CPM_DOWNLOAD_LOCATION})
diff --git a/cmake/recipes/polyfem.cmake b/cmake/recipes/polyfem.cmake
index a147183..203873f 100644
--- a/cmake/recipes/polyfem.cmake
+++ b/cmake/recipes/polyfem.cmake
@@ -7,13 +7,14 @@ endif()
 
 message(STATUS "Third-party: creating target 'polyfem::polyfem'")
 
-include(FetchContent)
-FetchContent_Declare(
-    polyfem
-    GIT_REPOSITORY https://github.com/polyfem/polyfem.git
-    GIT_TAG 12ac634833f91a3946cff26db01972fdb2ec3214
-    GIT_SHALLOW FALSE
-)
-FetchContent_MakeAvailable(polyfem)
-
+# include(FetchContent)
+# FetchContent_Declare(
+#     polyfem
+#     GIT_REPOSITORY https://github.com/polyfem/polyfem.git
+#     GIT_TAG 07ee824f836c445699bbc47ee6f19afbfe39bad4
+#     GIT_SHALLOW FALSE
+# )
+# FetchContent_MakeAvailable(polyfem)
 
+include(CPM)
+CPMAddPackage("gh:polyfem/polyfem#71b67e6416c59f498589ddc1633c11e6c246b392")
diff --git a/cmake/recipes/polyfem_cpm_cache.cmake b/cmake/recipes/polyfem_cpm_cache.cmake
new file mode 100644
index 0000000..bd57d22
--- /dev/null
+++ b/cmake/recipes/polyfem_cpm_cache.cmake
@@ -0,0 +1,24 @@
+#
+# Copyright 2021 Adobe. All rights reserved.
+# This file is licensed to you under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License. You may obtain a copy
+# of the License at http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+# OF ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+#
+
+if(DEFINED ENV{CPM_SOURCE_CACHE})
+    set(CPM_SOURCE_CACHE_DEFAULT $ENV{CPM_SOURCE_CACHE})
+else()
+    # Set CPM cache folder if unset
+    file(REAL_PATH "~/.cache/CPM" CPM_SOURCE_CACHE_DEFAULT EXPAND_TILDE)
+endif()
+
+set(CPM_SOURCE_CACHE
+    ${CPM_SOURCE_CACHE_DEFAULT}
+    CACHE PATH "Directory to download CPM dependencies"
+)
+message(STATUS "Using CPM cache folder: ${CPM_SOURCE_CACHE}")
\ No newline at end of file
diff --git a/cmake/recipes/polyfem_data.cmake b/cmake/recipes/polyfem_data.cmake
index 95aa184..41ba67c 100644
--- a/cmake/recipes/polyfem_data.cmake
+++ b/cmake/recipes/polyfem_data.cmake
@@ -7,7 +7,7 @@ include(FetchContent)
 FetchContent_Declare(
     polyfem_data
     GIT_REPOSITORY https://github.com/polyfem/polyfem-data
-    GIT_TAG 29a46df1fd90c237a82c219f346a956e72bd17d3
+    GIT_TAG f2089eb6eaa22071f7490e0f144e10afe85d4eba
     GIT_SHALLOW FALSE
     SOURCE_DIR ${POLYFEMPY_DATA_ROOT}
 )
diff --git a/cmake/recipes/pybind11.cmake b/cmake/recipes/pybind11.cmake
index a170372..a40457d 100644
--- a/cmake/recipes/pybind11.cmake
+++ b/cmake/recipes/pybind11.cmake
@@ -1,28 +1,11 @@
-#
-# Copyright 2020 Adobe. All rights reserved.
-# This file is licensed to you under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License. You may obtain a copy
-# of the License at http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under
-# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
-# OF ANY KIND, either express or implied. See the License for the specific language
-# governing permissions and limitations under the License.
-#
+# pybind11 (https://github.com/pybind/pybind11)
+# License: BSD-style
 if(TARGET pybind11::pybind11)
     return()
 endif()
 
 message(STATUS "Third-party: creating target 'pybind11::pybind11'")
 
-include(FetchContent)
-FetchContent_Declare(
-    pybind11
-    GIT_REPOSITORY https://github.com/pybind/pybind11.git
-    GIT_TAG v2.9.2
-    GIT_SHALLOW FALSE
-)
-
 if (POLICY CMP0094)  # https://cmake.org/cmake/help/latest/policy/CMP0094.html
     cmake_policy(SET CMP0094 NEW)  # FindPython should return the first matching Python
 endif ()
@@ -39,7 +22,8 @@ endif ()
 # Pybind11 still uses the deprecated FindPythonInterp. So let's call CMake's
 # new FindPython module and set PYTHON_EXECUTABLE for Pybind11 to pick up.
 # This works well with conda environments.
-find_package(Python REQUIRED COMPONENTS Interpreter Development)
+find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
 set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
 
-FetchContent_MakeAvailable(pybind11)
\ No newline at end of file
+include(CPM)
+CPMAddPackage("gh:pybind/pybind11@2.9.2")
diff --git a/polyfempy/Settings.py b/polyfempy/Settings.py
index 745a30a..655bcea 100644
--- a/polyfempy/Settings.py
+++ b/polyfempy/Settings.py
@@ -14,8 +14,7 @@ def __init__(self, discr_order=1, pressure_discr_order=1, pde=polyfempy.PDEs.Lap
 
         self.BDF_order = 1
 
-        self.scalar_formulation = "Laplacian"
-        self.tensor_formulation = "LinearElasticity"
+        self.formulation = "Laplacian"
         self.has_collision = contact_problem
         self.BDF_order = BDF_order
 
@@ -39,7 +38,6 @@ def __init__(self, discr_order=1, pressure_discr_order=1, pde=polyfempy.PDEs.Lap
         self.pde = pde
 
         self.selection = None
-        raise RuntimeError("Old Version Deprecated. Use version <0.5.2 on conda for the old interface")
 
     def get_problem(self):
         """Get the problem"""
@@ -62,23 +60,13 @@ def set_problem(self, problem):
 
     def get_pde(self, pde):
         """Get the PDE"""
-        if self._is_scalar:
-            return self.scalar_formulation
-        else:
-            self.tensor_formulation
+        return self.formulation
 
     def set_pde(self, pde):
         """Sets the PDE to solve, use any of the polyfempy.PDEs"""
 
-        if pde == "NonLinearElasticity":
-            pde = "NeoHookean"
-
         self._is_scalar = not polyfempy.polyfempy.is_tensor(pde)
-
-        if self._is_scalar:
-            self.scalar_formulation = pde
-        else:
-            self.tensor_formulation = pde
+        self.formulation = pde
 
     def set_material_params(self, name, value):
         """set the material parameters, for instance set_material_params("E", 200) sets the Young's modulus E to 200. See https://polyfem.github.io/documentation/#formulations for full list"""
diff --git a/polyfempy/__init__.py b/polyfempy/__init__.py
index 0c57571..a2e3e76 100644
--- a/polyfempy/__init__.py
+++ b/polyfempy/__init__.py
@@ -1,9 +1,4 @@
-from .polyfempy import Solver
-from .polyfempy import PDEs
-from .polyfempy import ScalarFormulations
-from .polyfempy import TensorFormulations
-from .polyfempy import solve_febio
-from .polyfempy import solve
+from .polyfempy import *
 
 from .Settings import Settings
 from .Selection import Selection
diff --git a/polyfempy/command.py b/polyfempy/command.py
index fed18e1..a3498a2 100644
--- a/polyfempy/command.py
+++ b/polyfempy/command.py
@@ -6,80 +6,30 @@ def polyfem():
     parser = argparse.ArgumentParser()
 
     parser.add_argument("-j", "--json", type=str,
-                        default="", help="Simulation json file")
-    parser.add_argument("-m", "--mesh", type=str, default="", help="Mesh path")
-    parser.add_argument("-b", "--febio", type=str,
-                        default="", help="FEBio file path")
+                        default="", help="Simulation JSON file")
 
-    parser.add_argument("--n_refs", type=int, default=0,
-                        help="Number of refinements")
-    parser.add_argument("--not_norm", type=bool, default=True,
-                        help="Skips mesh normalization")
+    parser.add_argument("-y", "--yaml", type=str,
+                        default="", help="Simulation YAML file")
 
-    parser.add_argument("--problem", type=str,
-                        default="", help="Problem name")
-    parser.add_argument("--sform", type=str,
-                        default="", help="Scalar formulation")
-    parser.add_argument("--tform", type=str,
-                        default="", help="Tensor formulation")
+    parser.add_argument("--max_threads", type=int, default=1,
+                        help="Maximum number of threads")
 
-    parser.add_argument("--solver", type=str, default="", help="Solver to use")
+    parser.add_argument("-s", "--strict_validation", action='store_true',
+                        help="Enables strict validation of input JSON")
 
-    parser.add_argument("-q", "-p", type=int, default=1,
-                        help="Discretization order")
-    parser.add_argument("--p_ref", type=bool,
-                        default=False, help="Use p refimenet")
-    # parser.add_argument("--spline", use_splines, "Use spline for quad/hex meshes");
-    parser.add_argument("--count_flipped_els", type=bool,
-                        default=False, help="Count flippsed elements")
-    parser.add_argument("--lin_geom", type=bool, default=False,
-                        help="Force use linear geometric mapping")
-    # parser.add_argument("--isoparametric", isoparametric, "Force use isoparametric basis");
-    # parser.add_argument("--serendipity", serendipity, "Use of serendipity elements, only for Q2");
-    # parser.add_argument("--stop_after_build_basis", stop_after_build_basis, "Stop after build bases");
-    parser.add_argument("--vis_mesh_res", type=float,
-                        default=-1.0, help="Vis mesh resolution")
-    parser.add_argument("--project_to_psd", type=bool,
-                        default=False, help="Project local matrices to psd")
-    parser.add_argument("--n_incr_load", type=int, default=-
-                        1, help="Number of incremeltal load")
-
-    parser.add_argument("--output", type=str, default="",
-                        help="Output json file")
-    parser.add_argument("--vtu", type=str, default="", help="Vtu output file")
-
-    parser.add_argument("--quiet", type=bool, default=False,
-                        help="Disable cout for logging")
-    parser.add_argument("--log_file", type=str,
-                        default="", help="Log to a file")
     parser.add_argument("--log_level", type=int, default=1,
                         help="Log level 1 debug 2 info")
 
-    parser.add_argument("--export_material_params", type=bool,
-                        default=False, help="Export material parameters")
+    parser.add_argument("-o", "--output_dir", type=str,
+                        default="", help="Directory for output files")
 
     args = parser.parse_args()
 
     polyfem_command(
-        args.json,
-        args.febio,
-        args.mesh,
-        args.problem,
-        args.sform,
-        args.tform,
-        args.n_refs,
-        args.not_norm,
-        args.solver,
-        args.q,
-        args.p_ref,
-        args.count_flipped_els,
-        args.lin_geom,
-        args.vis_mesh_res,
-        args.project_to_psd,
-        args.n_incr_load,
-        args.output,
-        args.vtu,
-        args.log_level,
-        args.log_file,
-        args.quiet,
-        args.export_material_params)
+        json=args.json,
+        yaml=args.yaml,
+        log_level=args.log_level,
+        strict_validation=args.strict_validation,
+        max_threads=args.max_threads,
+        output_dir=args.output_dir
+        )
diff --git a/setup.py b/setup.py
index 5982caf..f44003f 100644
--- a/setup.py
+++ b/setup.py
@@ -52,10 +52,7 @@ def build_extension(self, ext):
         cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir,
                       '-DPYTHON_EXECUTABLE=' + sys.executable,
                       '-DPYTHON_INCLUDE_DIR=' + python_include_directory,
-                      '-DPOLYSOLVE_WITH_PARDISO=OFF',
                       cholmod_str,
-                      #   '-DPOLYFEM_THREADING=NONE',
-                      '-DPOLYFEM_NO_UI=ON',
                       '-DPOLYSOLVE_WITH_AMGCL=OFF',
                       '-DPOLYSOLVE_WITH_MKL=OFF',
                       '-DPOLYSOLVE_WITH_SPECTRA=OFF']
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..52cfc3d
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,16 @@
+set(SOURCES
+  binding.cpp
+  raster.cpp
+)
+
+source_group(TREE "${CMAKE_CURRENT_SOURCE_DIR}" PREFIX "Source Files" FILES ${SOURCES})
+target_sources(polyfempy PRIVATE ${SOURCES})
+
+################################################################################
+# Subfolders
+################################################################################
+
+add_subdirectory(differentiable)
+add_subdirectory(mesh)
+add_subdirectory(state)
+add_subdirectory(solver)
diff --git a/src/binding.cpp b/src/binding.cpp
index 892143e..2eedced 100644
--- a/src/binding.cpp
+++ b/src/binding.cpp
@@ -1,28 +1,8 @@
-#include <polyfem/assembler/AssemblerUtils.hpp>
-#include <polyfem/utils/Logger.hpp>
-#include <polyfem/mesh/MeshUtils.hpp>
-#include <polyfem/mesh/mesh3D/CMesh3D.hpp>
-#include <polyfem/assembler/GenericProblem.hpp>
-#include <polyfem/utils/StringUtils.hpp>
-
-#include "polyfem/State.hpp"
-#include "polyfem/solver/ALNLProblem.hpp"
-#include "polyfem/solver/NLProblem.hpp"
-#include "raster.hpp"
-
 #include <geogram/basic/command_line.h>
 #include <geogram/basic/command_line_args.h>
 
-#include <igl/remove_duplicate_vertices.h>
-#include <igl/boundary_facets.h>
-#include <igl/remove_unreferenced.h>
 #include <stdexcept>
 
-#ifdef USE_TBB
-#include <tbb/task_scheduler_init.h>
-#include <thread>
-#endif
-
 #include <pybind11_json/pybind11_json.hpp>
 
 #include <pybind11/pybind11.h>
@@ -31,1043 +11,26 @@
 #include <pybind11/stl.h>
 #include <pybind11/iostream.h>
 
-namespace py = pybind11;
-
-typedef std::function<Eigen::MatrixXd(double x, double y, double z)> BCFuncV;
-typedef std::function<double(double x, double y, double z)> BCFuncS;
-
-class ScalarAssemblers
-{
-};
-class TensorAssemblers
-{
-};
-
-class PDEs
-{
-};
-
-// TODO add save_time_sequence
-
-namespace
-{
-  void init_globals(polyfem::State &state)
-  {
-    static bool initialized = false;
-
-    if (!initialized)
-    {
-#ifndef WIN32
-      setenv("GEO_NO_SIGNAL_HANDLER", "1", 1);
-#endif
-
-      GEO::initialize();
-
-#ifdef USE_TBB
-      const size_t MB = 1024 * 1024;
-      const size_t stack_size = 64 * MB;
-      unsigned int num_threads =
-          std::max(1u, std::thread::hardware_concurrency());
-      tbb::task_scheduler_init scheduler(num_threads, stack_size);
-#endif
-
-      // Import standard command line arguments, and custom ones
-      GEO::CmdLine::import_arg_group("standard");
-      GEO::CmdLine::import_arg_group("pre");
-      GEO::CmdLine::import_arg_group("algo");
-
-      state.init_logger(std::cout, 2);
-
-      initialized = true;
-    }
-  }
-
-} // namespace
-
-const auto rendering_lambda =
-    [](const Eigen::MatrixXd &vertices, const Eigen::MatrixXi &faces, int width,
-       int height, const Eigen::MatrixXd &camera_positionm,
-       const double camera_fov, const double camera_near,
-       const double camera_far, const bool is_perspective,
-       const Eigen::MatrixXd &lookatm, const Eigen::MatrixXd &upm,
-       const Eigen::MatrixXd &ambient_lightm,
-       const std::vector<std::pair<Eigen::MatrixXd, Eigen::MatrixXd>> &lights,
-       const Eigen::MatrixXd &diffuse_colorm,
-       const Eigen::MatrixXd &specular_colorm, const double specular_exponent) {
-      using namespace renderer;
-      using namespace Eigen;
-
-      Eigen::Vector3d camera_position(0, 0, 1);
-      Eigen::Vector3d lookat(0, 0, 0);
-      Eigen::Vector3d up(0, 0, 1);
-      Eigen::Vector3d ambient_light(0.1, 0.1, 0.1);
-      Eigen::Vector3d diffuse_color(1, 0, 0);
-      Eigen::Vector3d specular_color(1, 0, 0);
+#include "differentiable/binding.hpp"
+#include "mesh/binding.hpp"
+#include "state/binding.hpp"
+#include "solver/binding.hpp"
 
-      if (camera_positionm.size() > 0 && camera_positionm.size() != 3)
-        throw pybind11::value_error("camera_position have size 3");
-      if (camera_positionm.size() == 3)
-        camera_position << camera_positionm(0), camera_positionm(1),
-            camera_positionm(2);
-      if (lookatm.size() > 0 && lookatm.size() != 3)
-        throw pybind11::value_error("lookat have size 3");
-      if (lookatm.size() == 3)
-        lookat << lookatm(0), lookatm(1), lookatm(2);
-      if (upm.size() > 0 && upm.size() != 3)
-        throw pybind11::value_error("up have size 3");
-      if (upm.size() == 3)
-        up << upm(0), upm(1), upm(2);
-      if (ambient_lightm.size() > 0 && ambient_lightm.size() != 3)
-        throw pybind11::value_error("ambient_light have size 3");
-      if (ambient_lightm.size() == 3)
-        ambient_light << ambient_lightm(0), ambient_lightm(1),
-            ambient_lightm(2);
-      if (diffuse_colorm.size() > 0 && diffuse_colorm.size() != 3)
-        throw pybind11::value_error("diffuse_color have size 3");
-      if (diffuse_colorm.size() == 3)
-        diffuse_color << diffuse_colorm(0), diffuse_colorm(1),
-            diffuse_colorm(2);
-      if (specular_colorm.size() > 0 && specular_colorm.size() != 3)
-        throw pybind11::value_error("specular_color have size 3");
-      if (specular_colorm.size() == 3)
-        specular_color << specular_colorm(0), specular_colorm(1),
-            specular_colorm(2);
-
-      Material material;
-      material.diffuse_color = diffuse_color;
-      material.specular_color = specular_color;
-      material.specular_exponent = specular_exponent;
-
-      Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic>
-          frameBuffer(width, height);
-      UniformAttributes uniform;
-
-      const Vector3d gaze = lookat - camera_position;
-      const Vector3d w = -gaze.normalized();
-      const Vector3d u = up.cross(w).normalized();
-      const Vector3d v = w.cross(u);
-
-      Matrix4d M_cam_inv;
-      M_cam_inv << u(0), v(0), w(0), camera_position(0), u(1), v(1), w(1),
-          camera_position(1), u(2), v(2), w(2), camera_position(2), 0, 0, 0, 1;
-
-      uniform.M_cam = M_cam_inv.inverse();
-
-      {
-        const double camera_ar = double(width) / height;
-        const double tan_angle = tan(camera_fov / 2);
-        const double n = -camera_near;
-        const double f = -camera_far;
-        const double t = tan_angle * n;
-        const double b = -t;
-        const double r = t * camera_ar;
-        const double l = -r;
-
-        uniform.M_orth << 2 / (r - l), 0, 0, -(r + l) / (r - l), 0, 2 / (t - b),
-            0, -(t + b) / (t - b), 0, 0, 2 / (n - f), -(n + f) / (n - f), 0, 0,
-            0, 1;
-        Matrix4d P;
-        if (is_perspective)
-        {
-          P << n, 0, 0, 0, 0, n, 0, 0, 0, 0, n + f, -f * n, 0, 0, 1, 0;
-        }
-        else
-        {
-          P << 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1;
-        }
-
-        uniform.M = uniform.M_orth * P * uniform.M_cam;
-      }
-
-      Program program;
-      program.VertexShader = [&](const VertexAttributes &va,
-                                 const UniformAttributes &uniform) {
-        VertexAttributes out;
-        out.position = uniform.M * va.position;
-        Vector3d color = ambient_light;
-
-        Vector3d hit(va.position(0), va.position(1), va.position(2));
-        for (const auto &l : lights)
-        {
-          Vector3d Li = (l.first - hit).normalized();
-          Vector3d N = va.normal;
-          Vector3d diffuse =
-              va.material.diffuse_color * std::max(Li.dot(N), 0.0);
-          Vector3d H;
-          if (is_perspective)
-          {
-            H = (Li + (camera_position - hit).normalized()).normalized();
-          }
-          else
-          {
-            H = (Li - gaze.normalized()).normalized();
-          }
-          const Vector3d specular = va.material.specular_color
-                                    * std::pow(std::max(N.dot(H), 0.0),
-                                               va.material.specular_exponent);
-          const Vector3d D = l.first - hit;
-          color +=
-              (diffuse + specular).cwiseProduct(l.second) / D.squaredNorm();
-        }
-        out.color = color;
-        return out;
-      };
-
-      program.FragmentShader = [](const VertexAttributes &va,
-                                  const UniformAttributes &uniform) {
-        FragmentAttributes out(va.color(0), va.color(1), va.color(2));
-        out.depth = -va.position(2);
-        return out;
-      };
-
-      program.BlendingShader = [](const FragmentAttributes &fa,
-                                  const FrameBufferAttributes &previous) {
-        if (fa.depth < previous.depth)
-        {
-          FrameBufferAttributes out(fa.color[0] * 255, fa.color[1] * 255,
-                                    fa.color[2] * 255, fa.color[3] * 255);
-          out.depth = fa.depth;
-          return out;
-        }
-        else
-        {
-          return previous;
-        }
-      };
-
-      Eigen::MatrixXd vnormals(vertices.rows(), 3);
-      //    Eigen::MatrixXd areas(tmp.rows(), 1);
-      vnormals.setZero();
-      //    areas.setZero();
-      Eigen::MatrixXd fnormals(faces.rows(), 3);
-
-      for (int i = 0; i < faces.rows(); ++i)
-      {
-        const Vector3d l1 =
-            vertices.row(faces(i, 1)) - vertices.row(faces(i, 0));
-        const Vector3d l2 =
-            vertices.row(faces(i, 2)) - vertices.row(faces(i, 0));
-        const auto nn = l1.cross(l2);
-        const double area = nn.norm();
-        fnormals.row(i) = nn / area;
-
-        for (int j = 0; j < 3; j++)
-        {
-          int vid = faces(i, j);
-          vnormals.row(vid) += nn;
-          //    areas(vid) += area;
-        }
-      }
-
-      std::vector<VertexAttributes> vertex_attributes;
-      for (int i = 0; i < faces.rows(); ++i)
-      {
-        for (int j = 0; j < 3; j++)
-        {
-          int vid = faces(i, j);
-          VertexAttributes va(vertices(vid, 0), vertices(vid, 1),
-                              vertices(vid, 2));
-          va.material = material;
-          va.normal = vnormals.row(vid).normalized();
-          vertex_attributes.push_back(va);
-        }
-      }
-
-      rasterize_triangles(program, uniform, vertex_attributes, frameBuffer);
-
-      std::vector<uint8_t> image;
-      framebuffer_to_uint8(frameBuffer, image);
-
-      return image;
-    };
+namespace py = pybind11;
 
 PYBIND11_MODULE(polyfempy, m)
 {
-  const auto &pdes = py::class_<PDEs>(m, "PDEs");
-
-  const auto &sa = py::class_<ScalarAssemblers>(m, "ScalarFormulations");
-  for (auto &a : polyfem::assembler::AssemblerUtils::scalar_assemblers())
-  {
-    sa.attr(a.c_str()) = a;
-    pdes.attr(a.c_str()) = a;
-  }
-
-  const auto &ta = py::class_<TensorAssemblers>(m, "TensorFormulations");
-  for (auto &a : polyfem::assembler::AssemblerUtils::tensor_assemblers())
-  {
-    ta.attr(a.c_str()) = a;
-    pdes.attr(a.c_str()) = a;
-  }
-
-  ta.attr("NonLinearElasticity") = "NonLinearElasticity";
-  pdes.attr("NonLinearElasticity") = "NonLinearElasticity";
-
-  pdes.doc() = "List of supported partial differential equations";
-
-  m.def(
-      "is_tensor",
-      [](const std::string &pde) {
-        return polyfem::assembler::AssemblerUtils::is_tensor(pde);
-      },
-      "returns true if the pde is tensorial", py::arg("pde"));
-
-  const auto setting_lambda = [](polyfem::State &self,
-                                 const py::object &settings) {
-    using namespace polyfem;
-
-    init_globals(self);
-    // py::scoped_ostream_redirect output;
-    const std::string json_string = py::str(settings);
-    self.init(json::parse(json_string));
-  };
-
-  auto &solver = py::class_<polyfem::State>(m, "Solver")
-					   .def(py::init<>())
-
-					   .def("settings", setting_lambda,
-							"load PDE and problem parameters from the settings", py::arg("json"))
-
-					   .def("set_settings", setting_lambda,
-							"load PDE and problem parameters from the settings", py::arg("json"))
-
-					   .def(
-						   "set_log_level", [](polyfem::State &s, int log_level) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-							   log_level = std::max(0, std::min(6, log_level));
-							   spdlog::set_level(static_cast<spdlog::level::level_enum>(log_level)); },
-						   "sets polyfem log level, valid value between 0 (all logs) and 6 (no logs)", py::arg("log_level"))
-
-					   .def(
-						   "load_mesh_from_settings", [](polyfem::State &s, const bool normalize_mesh, const double vismesh_rel_area, const int n_refs, const double boundary_id_threshold) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-							   s.args["geometry"][0]["advanced"]["normalize_mesh"] = normalize_mesh;
-							   s.args["output"]["paraview"]["vismesh_rel_area"] = vismesh_rel_area;
-							   s.load_mesh(); },
-						   "Loads a mesh from the 'mesh' field of the json and 'bc_tag' if any bc tags", py::arg("normalize_mesh") = bool(false), py::arg("vismesh_rel_area") = double(0.00001), py::arg("n_refs") = int(0), py::arg("boundary_id_threshold") = double(-1))
-
-					   .def(
-						   "load_mesh_from_path", [](polyfem::State &s, const std::string &path, const bool normalize_mesh, const double vismesh_rel_area, const int n_refs, const double boundary_id_threshold) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-							   s.args["geometry"][0]["mesh"] = path;
-							   s.args["geometry"][0]["advanced"]["normalize_mesh"] = normalize_mesh;
-							   s.args["output"]["paraview"]["vismesh_rel_area"] = vismesh_rel_area;
-							   s.load_mesh(); },
-						   "Loads a mesh from the path and 'bc_tag' from the json if any bc tags", py::arg("path"), py::arg("normalize_mesh") = bool(false), py::arg("vismesh_rel_area") = double(0.00001), py::arg("n_refs") = int(0), py::arg("boundary_id_threshold") = double(-1))
-
-					   .def(
-						   "load_mesh_from_path_and_tags", [](polyfem::State &s, const std::string &path, const std::string &bc_tag, const bool normalize_mesh, const double vismesh_rel_area, const int n_refs, const double boundary_id_threshold) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-							   s.args["geometry"][0]["mesh"] = path;
-							   s.args["bc_tag"] = bc_tag;
-							   s.args["geometry"][0]["advanced"]["normalize_mesh"] = normalize_mesh;
-							   s.args["n_refs"] = n_refs;
-							   s.args["output"]["paraview"]["vismesh_rel_area"] = vismesh_rel_area;
-							   s.load_mesh(); },
-						   "Loads a mesh and bc_tags from path", py::arg("path"), py::arg("bc_tag_path"), py::arg("normalize_mesh") = bool(false), py::arg("vismesh_rel_area") = double(0.00001), py::arg("n_refs") = int(0), py::arg("boundary_id_threshold") = double(-1))
-
-					   .def(
-						   "set_mesh", [](polyfem::State &s, const Eigen::MatrixXd &V, const Eigen::MatrixXi &F, const bool normalize_mesh, const double vismesh_rel_area, const int n_refs, const double boundary_id_threshold) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-
-							   if (V.cols() == 2)
-								   s.mesh = std::make_unique<polyfem::mesh::CMesh2D>();
-							   else
-								   s.mesh = std::make_unique<polyfem::mesh::CMesh3D>();
-							   s.mesh->build_from_matrices(V, F);
-
-							   s.args["geometry"][0]["advanced"]["normalize_mesh"] = normalize_mesh;
-							   s.args["geometry"][0]["n_refs"] = n_refs;
-							   s.args["boundary_id_threshold"] = boundary_id_threshold;
-							   s.args["output"]["paraview"]["vismesh_rel_area"] = vismesh_rel_area;
-
-							   s.load_mesh(); },
-						   "Loads a mesh from vertices and connectivity", py::arg("vertices"), py::arg("connectivity"), py::arg("normalize_mesh") = bool(false), py::arg("vismesh_rel_area") = double(0.00001), py::arg("n_refs") = int(0), py::arg("boundary_id_threshold") = double(-1))
-
-					   .def(
-						   "set_high_order_mesh", [](polyfem::State &s, const Eigen::MatrixXd &V, const Eigen::MatrixXi &F, const Eigen::MatrixXd &nodes_pos, const std::vector<std::vector<int>> &nodes_indices, const bool normalize_mesh, const double vismesh_rel_area, const int n_refs, const double boundary_id_threshold) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-
-							   if (V.cols() == 2)
-								   s.mesh = std::make_unique<polyfem::mesh::CMesh2D>();
-							   else
-								   s.mesh = std::make_unique<polyfem::mesh::CMesh3D>();
-							   s.mesh->build_from_matrices(V, F);
-							   s.mesh->attach_higher_order_nodes(nodes_pos, nodes_indices);
-
-							   s.args["geometry"][0]["advanced"]["normalize_mesh"] = normalize_mesh;
-							   s.args["geometry"][0]["n_refs"] = n_refs;
-							   s.args["boundary_id_threshold"] = boundary_id_threshold;
-							   s.args["output"]["paraview"]["vismesh_rel_area"] = vismesh_rel_area;
-
-							   s.load_mesh(); },
-						   "Loads an high order mesh from vertices, connectivity, nodes, and node indices mapping element to nodes", py::arg("vertices"), py::arg("connectivity"), py::arg("nodes_pos"), py::arg("nodes_indices"), py::arg("normalize_mesh") = bool(false), py::arg("vismesh_rel_area") = double(0.00001), py::arg("n_refs") = int(0), py::arg("boundary_id_threshold") = double(-1))
-
-					   .def(
-						   "set_boundary_side_set_from_bary", [](polyfem::State &s, const std::function<int(const polyfem::RowVectorNd &)> &boundary_marker) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-							   s.mesh->compute_boundary_ids(boundary_marker); },
-						   "Sets the side set for the boundary conditions, the functions takes the barycenter of the boundary (edge or face)", py::arg("boundary_marker"))
-					   .def(
-						   "set_boundary_side_set_from_bary_and_boundary", [](polyfem::State &s, const std::function<int(const polyfem::RowVectorNd &, bool)> &boundary_marker) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-							   s.mesh->compute_boundary_ids(boundary_marker); },
-						   "Sets the side set for the boundary conditions, the functions takes the barycenter of the boundary (edge or face) and a flag that says if the element is boundary", py::arg("boundary_marker"))
-					   .def(
-						   "set_boundary_side_set_from_v_ids", [](polyfem::State &s, const std::function<int(const std::vector<int> &, bool)> &boundary_marker) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-							   s.mesh->compute_boundary_ids(boundary_marker); },
-						   "Sets the side set for the boundary conditions, the functions takes the sorted list of vertex id and a flag that says if the element is boundary", py::arg("boundary_marker"))
-
-					   .def(
-						   "set_rhs_from_path", [](polyfem::State &s, std::string &path) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-								 throw std::runtime_error("No RHS path"); },
-						   "Loads the rhs from a file", py::arg("path"))
-					   .def(
-						   "set_rhs", [](polyfem::State &s, const Eigen::MatrixXd &rhs) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-							   s.rhs_in = rhs; },
-						   "Sets the rhs", py::arg("matrix"))
-
-					   .def(
-						   "solve", [](polyfem::State &s) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-							   s.compute_mesh_stats();
-
-							   s.build_basis();
-
-							   s.assemble_rhs();
-							   s.assemble_stiffness_mat();
-
-							   s.solve_export_to_file = false;
-							   s.solution_frames.clear();
-							   s.solve_problem();
-							   s.solve_export_to_file = true; },
-						   "solve the pde")
-					   .def(
-						   "init_timestepping", [](polyfem::State &s, const double t0, const double dt) {
-							   init_globals(s);
-							   s.compute_mesh_stats();
-
-							   s.build_basis();
-
-							   s.assemble_rhs();
-							   s.assemble_stiffness_mat();
-
-							   s.solution_frames.clear();
-							   Eigen::VectorXd _;
-							   s.init_transient(_);
-							   const polyfem::assembler::RhsAssembler &rhs_assembler = *s.step_data.rhs_assembler;
-							   s.solve_transient_tensor_non_linear_init(t0, dt, rhs_assembler); },
-						   "initialize timestepping", py::arg("t0"), py::arg("dt"))
-					   .def(
-						   "step_in_time", [](polyfem::State &s, const double t0, const double dt, const int t) {
-							   json solver_info;
-							   s.step_data.nl_problem->substepping(t0 + t * dt);
-							   s.step_data.alnl_problem->update_target(t0 + t * dt);
-							   s.solve_transient_tensor_non_linear_step(t0, dt, t, solver_info);
-							   return solver_info; },
-						   "step in time", py::arg("t0"), py::arg("dt"), py::arg("t"))
-
-					   .def(
-						   "compute_errors", [](polyfem::State &s) {
-							   init_globals(s);
-							   //    py::scoped_ostream_redirect output;
-
-							   s.compute_errors(); },
-						   "compute the error")
-
-					   .def(
-						   "get_log", [](polyfem::State &s) {
-							   //    py::scoped_ostream_redirect output;
-							   json out;
-							   s.save_json(out);
-							   return out; },
-						   "gets the log")
-
-					   //    .def("export_data", [](polyfem::State &s) { py::scoped_ostream_redirect output; s.export_data(); }, "exports all data specified in the settings")
-					   .def(
-						   "export_vtu", [](polyfem::State &s, std::string &path, bool boundary_only) {
-							   //    py::scoped_ostream_redirect output;
-								auto& vis_bnd = s.args["output"]["advanced"]["vis_boundary_only"];
-							  const bool tmp = vis_bnd; 
-							  vis_bnd = boundary_only;
-							   s.save_vtu(path, 0);
-								 vis_bnd = tmp;},
-						   "exports the solution as vtu", py::arg("path"), py::arg("boundary_only") = bool(false))
-					   //    .def("export_wire", [](polyfem::State &s, std::string &path, bool isolines) { py::scoped_ostream_redirect output; s.save_wire(path, isolines); }, "exports wireframe of the mesh", py::arg("path"), py::arg("isolines") = false)
-
-					   .def(
-						   "get_solution", [](const polyfem::State &s) { return s.sol; },
-						   "returns the solution")
-					   .def(
-						   "get_pressure", [](const polyfem::State &s) { return s.pressure; },
-						   "returns the pressure")
-					   .def(
-						   "get_sampled_solution", [](polyfem::State &s, bool boundary_only) {
-							   //    py::scoped_ostream_redirect output;
-							   Eigen::MatrixXd points;
-							   Eigen::MatrixXi tets;
-							   Eigen::MatrixXi el_id;
-							   Eigen::MatrixXd discr;
-							   Eigen::MatrixXd fun;
-
-								auto& vis_bnd = s.args["output"]["advanced"]["vis_boundary_only"];
-							  const bool tmp = vis_bnd; 
-							  vis_bnd = boundary_only;
-
-							   s.build_vis_mesh(points, tets, el_id, discr);
-
-							   Eigen::MatrixXd ids(points.rows(), 1);
-							   for (int i = 0; i < points.rows(); ++i)
-							   {
-								   ids(i) = s.mesh->get_body_id(el_id(i));
-							   }
-								const bool use_sampler = true;
-							   s.interpolate_function(points.rows(), s.sol, fun, use_sampler, boundary_only);
-
-							  vis_bnd = tmp;
-
-							   return py::make_tuple(points, tets, el_id, ids, fun); },
-						   "returns the solution on a densly sampled mesh, use 'vismesh_rel_area' to control density", py::arg("boundary_only") = bool(false))
-
-					   .def(
-						   "get_stresses", [](polyfem::State &s, bool boundary_only) {
-							   //    py::scoped_ostream_redirect output;
-							   Eigen::MatrixXd points;
-							   Eigen::MatrixXi tets;
-							   Eigen::MatrixXi el_id;
-							   Eigen::MatrixXd discr;
-							   Eigen::MatrixXd fun;
-								auto& vis_bnd = s.args["output"]["advanced"]["vis_boundary_only"];
-							  const bool tmp = vis_bnd; 
-							  vis_bnd = boundary_only;
-
-							   s.build_vis_mesh(points, tets, el_id, discr);
-							   const bool use_sampler = true;
-							   s.compute_tensor_value(points.rows(), s.sol, fun, use_sampler, boundary_only);
-
-								vis_bnd = tmp;
-
-							   return fun; },
-						   "returns the stress tensor on a densly sampled mesh, use 'vismesh_rel_area' to control density", py::arg("boundary_only") = bool(false))
-
-					   .def(
-						   "get_sampled_mises", [](polyfem::State &s, bool boundary_only) {
-							   //    py::scoped_ostream_redirect output;
-							   Eigen::MatrixXd points;
-							   Eigen::MatrixXi tets;
-							   Eigen::MatrixXi el_id;
-							   Eigen::MatrixXd discr;
-							   Eigen::MatrixXd fun;
-
-								auto& vis_bnd = s.args["output"]["advanced"]["vis_boundary_only"];
-							  const bool tmp = vis_bnd; 
-							  vis_bnd = boundary_only;
-
-							   s.build_vis_mesh(points, tets, el_id, discr);
-							   	const bool use_sampler = true;
-							   s.compute_scalar_value(points.rows(), s.sol, fun, use_sampler, boundary_only);
-
-								vis_bnd = tmp;
-
-							   return fun; },
-						   "returns the von mises stresses on a densly sampled mesh, use 'vismesh_rel_area' to control density", py::arg("boundary_only") = bool(false))
-
-					   .def(
-						   "get_sampled_mises_avg", [](polyfem::State &s, bool boundary_only) {
-							   //    py::scoped_ostream_redirect output;
-							   Eigen::MatrixXd points;
-							   Eigen::MatrixXi tets;
-							   Eigen::MatrixXi el_id;
-							   Eigen::MatrixXd discr;
-							   Eigen::MatrixXd fun, tfun;
-
-								auto& vis_bnd = s.args["output"]["advanced"]["vis_boundary_only"];
-							  const bool tmp = vis_bnd; 
-							  vis_bnd = boundary_only;
-
-							   s.build_vis_mesh(points, tets, el_id, discr);
-							   const bool use_sampler = true;
-							   s.average_grad_based_function(points.rows(), s.sol, fun, tfun, use_sampler, boundary_only);
-
-								vis_bnd = tmp;
-
-							   return py::make_tuple(fun, tfun); },
-						   "returns the von mises stresses and stress tensor averaged around a vertex on a densly sampled mesh, use 'vismesh_rel_area' to control density", py::arg("boundary_only") = bool(false))
-
-					   .def(
-						   "get_sampled_traction_forces", [](polyfem::State &s, const bool apply_displacement, const bool compute_avg) {
-							   //    py::scoped_ostream_redirect output;
-
-							   if (!s.mesh)
-								   throw pybind11::value_error("Load the mesh first!");
-							   if (!s.mesh->is_volume())
-								   throw pybind11::value_error("This function works only on volumetric meshes!");
-							   if (s.problem->is_scalar())
-								   throw pybind11::value_error("Define a tensor problem!");
-
-							   Eigen::MatrixXd result, stresses, mises;
-
-							   Eigen::MatrixXd v_surf;
-							   Eigen::MatrixXi f_surf;
-							   const double epsilon = 1e-10;
-
-							   {
-								   const polyfem::mesh::CMesh3D &mesh3d = *dynamic_cast<polyfem::mesh::CMesh3D *>(s.mesh.get());
-								   Eigen::MatrixXd points(mesh3d.n_vertices(), 3);
-								   Eigen::MatrixXi tets(mesh3d.n_cells(), 4);
-
-								   for (int t = 0; t < mesh3d.n_cells(); ++t)
-								   {
-									   if (mesh3d.n_cell_vertices(t) != 4)
-										   throw pybind11::value_error("Works only with tet meshes!");
-
-									   for (int i = 0; i < 4; ++i)
-										   tets(t, i) = mesh3d.cell_vertex(t, i);
-								   }
-
-								   for (int p = 0; p < mesh3d.n_vertices(); ++p)
-									   points.row(p) << mesh3d.point(p);
-
-								   Eigen::MatrixXi f_surf_tmp, _;
-								   igl::boundary_facets(tets, f_surf_tmp);
-								   igl::remove_unreferenced(points, f_surf_tmp, v_surf, f_surf, _);
-							   }
-
-							   if (apply_displacement)
-								   s.interpolate_boundary_tensor_function(v_surf, f_surf, s.sol, s.sol, compute_avg, result, stresses, mises, true);
-							   else
-								   s.interpolate_boundary_tensor_function(v_surf, f_surf, s.sol, compute_avg, result, stresses, mises, true);
-
-							   return py::make_tuple(v_surf, f_surf, result, stresses, mises); },
-						   "returns the traction forces, stresses, and von mises computed on the surface", py::arg("apply_displacement") = bool(false), py::arg("compute_avg") = bool(true))
-
-					   ////////////////////////////////////////////////////////////////////////////////////////////
-					   .def(
-						   "get_sampled_points_frames", [](polyfem::State &s) {
-							   //    py::scoped_ostream_redirect output;
-							   assert(!s.solution_frames.empty());
-
-							   std::vector<Eigen::MatrixXd> pts;
-
-							   for (const auto &sol : s.solution_frames)
-							   {
-								   pts.push_back(sol.points);
-							   }
-
-							   return pts; },
-						   "returns the points frames for a time dependent problem on a densly sampled mesh, use 'vismesh_rel_area' to control density")
-
-					   .def(
-						   "get_sampled_connectivity_frames", [](polyfem::State &s) {
-							   //    py::scoped_ostream_redirect output;
-							   assert(!s.solution_frames.empty());
-
-							   std::vector<Eigen::MatrixXi> tets;
-
-							   for (const auto &sol : s.solution_frames)
-								   tets.push_back(sol.connectivity);
-
-							   return tets; },
-						   "returns the connectivity frames for a time dependent problem on a densly sampled mesh, use 'vismesh_rel_area' to control density")
-
-					   .def(
-						   "get_sampled_solution_frames", [](polyfem::State &s) {
-							   //    py::scoped_ostream_redirect output;
-							   assert(!s.solution_frames.empty());
-
-							   std::vector<Eigen::MatrixXd> fun;
-
-							   for (const auto &sol : s.solution_frames)
-							   {
-								   fun.push_back(sol.solution);
-							   }
-
-							   return fun; },
-						   "returns the solution frames for a time dependent problem on a densly sampled mesh, use 'vismesh_rel_area' to control density")
-
-					   .def(
-						   "get_sampled_mises_frames", [](polyfem::State &s) {
-							   //    py::scoped_ostream_redirect output;
-							   assert(!s.solution_frames.empty());
-
-							   std::vector<Eigen::MatrixXd> mises;
-
-							   for (const auto &sol : s.solution_frames)
-								   mises.push_back(sol.scalar_value);
-
-							   return mises; },
-						   "returns the von mises stresses frames on a densly sampled mesh, use 'vismesh_rel_area' to control density")
-
-					   .def(
-						   "get_sampled_mises_avg_frames", [](polyfem::State &s) {
-							   //    py::scoped_ostream_redirect output;
-							   assert(!s.solution_frames.empty());
-
-							   std::vector<Eigen::MatrixXd> mises;
-
-							   for (const auto &sol : s.solution_frames)
-								   mises.push_back(sol.scalar_value_avg);
-
-							   return mises; },
-						   "returns the von mises stresses per frame averaged around a vertex on a densly sampled mesh, use 'vismesh_rel_area' to control density")
-
-					   .def(
-						   "get_boundary_sidesets", [](polyfem::State &s) {
-							   //    py::scoped_ostream_redirect output;
-							   Eigen::MatrixXd points;
-							   Eigen::MatrixXi faces;
-							   Eigen::MatrixXd sidesets;
-
-							   s.get_sidesets(points, faces, sidesets);
-
-							   return py::make_tuple(points, faces, sidesets); },
-						   "exports get the boundary sideset, edges in 2d or trangles in 3d")
-					   .def(
-						   "get_body_ids", [](polyfem::State &s) {
-							   //    py::scoped_ostream_redirect output;
-							   Eigen::MatrixXd points;
-							   Eigen::MatrixXi tets;
-							   Eigen::MatrixXi el_id;
-							   Eigen::MatrixXd discr;
-
-							   s.build_vis_mesh(points, tets, el_id, discr);
-
-							   Eigen::MatrixXd ids(points.rows(), 1);
-
-							   for (int i = 0; i < points.rows(); ++i)
-							   {
-								   ids(i) = s.mesh->get_body_id(el_id(i));
-							   }
-
-							   return py::make_tuple(points, tets, el_id, ids); },
-						   "exports get the body ids")
-					   .def(
-						   "update_dirichlet_boundary", [](polyfem::State &self, const int id, const py::object &val, const bool isx, const bool isy, const bool isz, const std::string &interp) {
-							   using namespace polyfem;
-							   // py::scoped_ostream_redirect output;
-							   const json json_val = val;
-							   if (polyfem::assembler::GenericTensorProblem *prob = dynamic_cast<polyfem::assembler::GenericTensorProblem *>(self.problem.get()))
-							   {
-								   prob->update_dirichlet_boundary(id, json_val, isx, isy, isz, interp);
-							   }
-							   else if (polyfem::assembler::GenericScalarProblem *prob = dynamic_cast<polyfem::assembler::GenericScalarProblem *>(self.problem.get()))
-							   {
-								   prob->update_dirichlet_boundary(id, json_val, interp);
-							   }
-							   else
-							   {
-								   throw "Updating BC works only for GenericProblems";
-							   } },
-						   "updates dirichlet boundary", py::arg("id"), py::arg("val"), py::arg("isx") = bool(true), py::arg("isy") = bool(true), py::arg("isz") = bool(true), py::arg("interp") = std::string(""))
-					   .def(
-						   "update_neumann_boundary", [](polyfem::State &self, const int id, const py::object &val, const std::string &interp) {
-							   using namespace polyfem;
-							   // py::scoped_ostream_redirect output;
-							   const json json_val = val;
-
-							   if (auto prob = dynamic_cast<polyfem::assembler::GenericTensorProblem*>(self.problem.get()))
-							   {
-								   prob->update_neumann_boundary(id, json_val, interp);
-							   }
-							   else if (auto prob = dynamic_cast<polyfem::assembler::GenericScalarProblem *>(self.problem.get()))
-							   {
-								   prob->update_neumann_boundary(id, json_val, interp);
-							   }
-							   else
-							   {
-								   throw "Updating BC works only for GenericProblems";
-							   } },
-						   "updates neumann boundary", py::arg("id"), py::arg("val"), py::arg("interp") = std::string(""))
-					   .def(
-						   "update_pressure_boundary", [](polyfem::State &self, const int id, const py::object &val, const std::string &interp) {
-							   using namespace polyfem;
-							   // py::scoped_ostream_redirect output;
-							   const json json_val = val;
-
-							   if (auto prob = dynamic_cast<polyfem::assembler::GenericTensorProblem *>(self.problem.get()))
-							   {
-								   prob->update_pressure_boundary(id, json_val, interp);
-							   }
-							   else
-							   {
-								   throw "Updating BC works only for Tensor GenericProblems";
-							   } },
-						   "updates pressure boundary", py::arg("id"), py::arg("val"), py::arg("interp") = std::string(""))
-					   .def(
-						   "update_obstacle_displacement", [](polyfem::State &self, const int oid, const py::object &val, const std::string &interp) {
-							   using namespace polyfem;
-							   // py::scoped_ostream_redirect output;
-							   const json json_val = val;
-							   self.obstacle.change_displacement(oid, json_val, interp); },
-						   "updates obstacle displacement", py::arg("oid"), py::arg("val"), py::arg("interp") = std::string(""))
-					   .def(
-						   "render", [](polyfem::State &self, int width, int height, const Eigen::MatrixXd &camera_positionm, const double camera_fov, const double camera_near, const double camera_far, const bool is_perspective, const Eigen::MatrixXd &lookatm, const Eigen::MatrixXd &upm, const Eigen::MatrixXd &ambient_lightm, const std::vector<std::pair<Eigen::MatrixXd, Eigen::MatrixXd>> &lights, const Eigen::MatrixXd &diffuse_colorm, const Eigen::MatrixXd &specular_colorm, const double specular_exponent) {
-							   using namespace Eigen;
-
-							   const int problem_dim = self.problem->is_scalar() ? 1 : self.mesh->dimension();
-
-							   Eigen::MatrixXd tmp = self.boundary_nodes_pos;
-							   assert(tmp.rows() * problem_dim == self.sol.size());
-							   for (int i = 0; i < self.sol.size(); i += problem_dim)
-							   {
-								   for (int d = 0; d < problem_dim; ++d)
-								   {
-									   tmp(i / problem_dim, d) += self.sol(i + d);
-								   }
-							   }
-
-							   return rendering_lambda(tmp, self.boundary_triangles, width, height, camera_positionm, camera_fov, camera_near, camera_far, is_perspective, lookatm, upm, ambient_lightm, lights, diffuse_colorm, specular_colorm, specular_exponent); },
-						   "renders the scene", py::kw_only(), py::arg("width"), py::arg("height"), py::arg("camera_position") = Eigen::MatrixXd(), py::arg("camera_fov") = double(0.8), py::arg("camera_near") = double(1), py::arg("camera_far") = double(10), py::arg("is_perspective") = bool(true), py::arg("lookat") = Eigen::MatrixXd(), py::arg("up") = Eigen::MatrixXd(), py::arg("ambient_light") = Eigen::MatrixXd(), py::arg("lights") = std::vector<std::pair<Eigen::MatrixXd, Eigen::MatrixXd>>(), py::arg("diffuse_color") = Eigen::MatrixXd(), py::arg("specular_color") = Eigen::MatrixXd(), py::arg("specular_exponent") = double(1))
-					   .def(
-						   "render_extrinsic", [](polyfem::State &self, const std::vector<std::pair<Eigen::MatrixXd, Eigen::MatrixXi>> &vertex_face, int width, int height, const Eigen::MatrixXd &camera_positionm, const double camera_fov, const double camera_near, const double camera_far, const bool is_perspective, const Eigen::MatrixXd &lookatm, const Eigen::MatrixXd &upm, const Eigen::MatrixXd &ambient_lightm, const std::vector<std::pair<Eigen::MatrixXd, Eigen::MatrixXd>> &lights, const Eigen::MatrixXd &diffuse_colorm, const Eigen::MatrixXd &specular_colorm, const double specular_exponent) {
-							   int v_count = 0;
-							   int f_count = 0;
-							   for (const auto& vf_pair : vertex_face) {
-								   v_count += vf_pair.first.rows();
-								   f_count += vf_pair.second.rows();
-							   }
-							   Eigen::MatrixXd vertices(v_count, 3);
-							   Eigen::MatrixXi faces(f_count, 3);
-							   v_count = 0;
-							   f_count = 0;
-							   for (const auto& vf_pair : vertex_face) {
-								   vertices.block(v_count, 0, vf_pair.first.rows(), 3) = vf_pair.first;
-								   faces.block(f_count, 0, vf_pair.second.rows(), 3) = (vf_pair.second.array() + v_count).matrix();
-								   v_count += vf_pair.first.rows();
-								   f_count += vf_pair.second.rows();
-							   }
-							   return rendering_lambda(vertices, faces, width, height, camera_positionm, camera_fov, camera_near, camera_far, is_perspective, lookatm, upm, ambient_lightm, lights, diffuse_colorm, specular_colorm, specular_exponent); },
-						   "renders the extrinsic scene", py::kw_only(), py::arg("vertex_face") = std::vector<std::pair<Eigen::MatrixXd, Eigen::MatrixXi>>(), py::arg("width"), py::arg("height"), py::arg("camera_position") = Eigen::MatrixXd(), py::arg("camera_fov") = double(0.8), py::arg("camera_near") = double(1), py::arg("camera_far") = double(10), py::arg("is_perspective") = bool(true), py::arg("lookat") = Eigen::MatrixXd(), py::arg("up") = Eigen::MatrixXd(), py::arg("ambient_light") = Eigen::MatrixXd(), py::arg("lights") = std::vector<std::pair<Eigen::MatrixXd, Eigen::MatrixXd>>(), py::arg("diffuse_color") = Eigen::MatrixXd(), py::arg("specular_color") = Eigen::MatrixXd(), py::arg("specular_exponent") = double(1));
-
-  solver.doc() = "Polyfem solver";
-
-  m.def(
-      "polyfem_command",
-      [](const std::string &json_file, const std::string &febio_file,
-         const std::string &mesh, const std::string &problem_name,
-         const std::string &scalar_formulation,
-         const std::string &tensor_formulation, const int n_refs,
-         const bool skip_normalization, const std::string &solver,
-         const int discr_order, const bool p_ref, const bool count_flipped_els,
-         const bool force_linear_geom, const double vis_mesh_res,
-         const bool project_to_psd, const int nl_solver_rhs_steps,
-         const std::string &output, const std::string &vtu, const int log_level,
-         const std::string &log_file, const bool is_quiet,
-         const bool export_material_params) {
-        json in_args = json({});
-
-        if (!json_file.empty())
-        {
-          std::ifstream file(json_file);
-
-          if (file.is_open())
-            file >> in_args;
-          else
-            throw "unable to open " + json_file + " file";
-          file.close();
-        }
-        else
-        {
-          in_args["geometry"][0]["mesh"] = mesh;
-          in_args["force_linear_geometry"] = force_linear_geom;
-          in_args["n_refs"] = n_refs;
-          if (!problem_name.empty())
-            in_args["problem"] = problem_name;
-          in_args["geometry"][0]["advanced"]["normalize_mesh"] =
-              !skip_normalization;
-          in_args["project_to_psd"] = project_to_psd;
-
-          if (!scalar_formulation.empty())
-            in_args["scalar_formulation"] = scalar_formulation;
-          if (!tensor_formulation.empty())
-            in_args["tensor_formulation"] = tensor_formulation;
-          // in_args["mixed_formulation"] = mixed_formulation;
-
-          in_args["discr_order"] = discr_order;
-          // in_args["use_spline"] = use_splines;
-          in_args["count_flipped_els"] = count_flipped_els;
-          in_args["output"] = output;
-          in_args["use_p_ref"] = p_ref;
-          // in_args["iso_parametric"] = isoparametric;
-          // in_args["serendipity"] = serendipity;
-
-          in_args["nl_solver_rhs_steps"] = nl_solver_rhs_steps;
-
-          if (!vtu.empty())
-          {
-            in_args["export"]["vis_mesh"] = vtu;
-            in_args["export"]["wire_mesh"] =
-                polyfem::utils::StringUtils::replace_ext(vtu, "obj");
-          }
-          if (!solver.empty())
-            in_args["solver_type"] = solver;
-
-          if (vis_mesh_res > 0)
-            in_args["output"]["paraview"]["vismesh_rel_area"] = vis_mesh_res;
-
-          if (export_material_params)
-            in_args["export"]["material_params"] = true;
-        }
-
-        polyfem::State state;
-        state.init_logger(log_file, log_level, is_quiet);
-        state.init(in_args);
-
-        if (!febio_file.empty())
-          state.load_febio(febio_file, in_args);
-        else
-          state.load_mesh();
-        state.compute_mesh_stats();
-
-        state.build_basis();
-
-        state.assemble_rhs();
-        state.assemble_stiffness_mat();
-
-        state.solve_problem();
-
-        state.compute_errors();
-
-        state.save_json();
-        state.export_data();
-      },
-      "runs the polyfem command, internal usage");
-
-  m.def(
-      "solve_febio",
-      [](const std::string &febio_file, const std::string &output_path,
-         const int log_level, const py::kwargs &kwargs) {
-        if (febio_file.empty())
-          throw pybind11::value_error("Specify a febio file!");
-
-        // json in_args = opts.is_none() ? json({}) : json(opts);
-        json in_args = json(static_cast<py::dict>(kwargs));
-
-        if (!output_path.empty())
-        {
-          in_args["export"]["paraview"] = output_path;
-          in_args["export"]["wire_mesh"] =
-              polyfem::utils::StringUtils::replace_ext(output_path, "obj");
-          in_args["export"]["material_params"] = true;
-          in_args["export"]["body_ids"] = true;
-          in_args["export"]["contact_forces"] = true;
-          in_args["export"]["surface"] = true;
-        }
-
-        const int discr_order =
-            in_args.contains("discr_order") ? int(in_args["discr_order"]) : 1;
-
-        if (discr_order == 1 && !in_args.contains("vismesh_rel_area"))
-          in_args["output"]["paraview"]["vismesh_rel_area"] = 1e10;
-
-        polyfem::State state;
-        state.init_logger("", log_level, false);
-        state.init(in_args);
-        state.load_febio(febio_file, in_args);
-        state.compute_mesh_stats();
-
-        state.build_basis();
-
-        state.assemble_rhs();
-        state.assemble_stiffness_mat();
-
-        state.solve_problem();
-
-        // state.compute_errors();
-
-        state.save_json();
-        state.export_data();
-      },
-      "runs FEBio", py::arg("febio_file"),
-      py::arg("output_path") = std::string(""), py::arg("log_level") = 2);
-
-  m.def(
-      "solve",
-      [](const Eigen::MatrixXd &vertices, const Eigen::MatrixXi &cells,
-         const py::object &sidesets_func, const int log_level,
-         const py::kwargs &kwargs) {
-        std::string log_file = "";
-
-        std::unique_ptr<polyfem::State> res =
-            std::make_unique<polyfem::State>();
-        polyfem::State &state = *res;
-        state.init_logger(log_file, log_level, false);
-
-        json in_args = json(static_cast<py::dict>(kwargs));
-
-        state.init(in_args);
-
-        state.load_mesh(vertices, cells);
-
-        [&]() {
-          if (!sidesets_func.is_none())
-          {
-            try
-            {
-              const auto fun =
-                  sidesets_func
-                      .cast<std::function<int(const polyfem::RowVectorNd &)>>();
-              state.mesh->compute_boundary_ids(fun);
-              return true;
-            }
-            catch (...)
-            {
-              {
-              }
-            }
-            try
-            {
-              const auto fun = sidesets_func.cast<
-                  std::function<int(const polyfem::RowVectorNd &, bool)>>();
-              state.mesh->compute_boundary_ids(fun);
-              return true;
-            }
-            catch (...)
-            {
-            }
-
-            try
-            {
-              const auto fun = sidesets_func.cast<
-                  std::function<int(const std::vector<int> &, bool)>>();
-              state.mesh->compute_boundary_ids(fun);
-              return true;
-            }
-            catch (...)
-            {
-            }
-
-            throw pybind11::value_error(
-                "sidesets_func has invalid type, should be a function (p)->int, (p, bool)->int, ([], bool)->int");
-          }
-        }();
+  define_pde_types(m);
 
-        state.compute_mesh_stats();
+  define_solver(m);
+  define_solve(m);
 
-        state.build_basis();
+  define_mesh(m);
 
-        state.assemble_rhs();
-        state.assemble_stiffness_mat();
-        state.solve_problem();
+  define_nonlinear_problem(m);
 
-        return res;
-      },
-      "single solve function", py::kw_only(),
-      py::arg("vertices") = Eigen::MatrixXd(),
-      py::arg("cells") = Eigen::MatrixXi(),
-      py::arg("sidesets_func") = py::none(), py::arg("log_level") = 2);
+  define_differentiable_cache(m);
+  define_adjoint(m);
+  define_objective(m);
+  define_opt_utils(m);
 }
diff --git a/src/differentiable/CMakeLists.txt b/src/differentiable/CMakeLists.txt
new file mode 100644
index 0000000..67ffa37
--- /dev/null
+++ b/src/differentiable/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(SOURCES
+  adjoint.cpp
+  diff_cache.cpp
+  objective.cpp
+  utils.cpp
+)
+
+source_group(TREE "${CMAKE_CURRENT_SOURCE_DIR}" PREFIX "Source Files" FILES ${SOURCES})
+target_sources(polyfempy PRIVATE ${SOURCES})
diff --git a/src/differentiable/adjoint.cpp b/src/differentiable/adjoint.cpp
new file mode 100644
index 0000000..7d46f1a
--- /dev/null
+++ b/src/differentiable/adjoint.cpp
@@ -0,0 +1,23 @@
+#include <polyfem/solver/AdjointTools.hpp>
+#include <polyfem/utils/MatrixUtils.hpp>
+#include <polyfem/State.hpp>
+#include "binding.hpp"
+#include <pybind11/eigen.h>
+
+namespace py = pybind11;
+using namespace polyfem;
+using namespace polyfem::solver;
+
+void define_adjoint(py::module_ &m)
+{
+  m.def("shape_derivative", [](State &state) {
+    Eigen::VectorXd term;
+    if (state.problem->is_time_dependent())
+      AdjointTools::dJ_shape_transient_adjoint_term(
+          state, state.get_adjoint_mat(1), state.get_adjoint_mat(0), term);
+    else
+      AdjointTools::dJ_shape_static_adjoint_term(
+          state, state.diff_cached.u(0), state.get_adjoint_mat(0), term);
+    return utils::unflatten(term, state.mesh->dimension());
+  }, py::arg("solver"));
+}
diff --git a/src/differentiable/binding.hpp b/src/differentiable/binding.hpp
new file mode 100644
index 0000000..63a26e3
--- /dev/null
+++ b/src/differentiable/binding.hpp
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+void define_differentiable_cache(py::module_ &m);
+void define_adjoint(py::module_ &m);
+void define_objective(py::module_ &m);
+void define_opt_utils(py::module_ &m);
diff --git a/src/differentiable/diff_cache.cpp b/src/differentiable/diff_cache.cpp
new file mode 100644
index 0000000..f492987
--- /dev/null
+++ b/src/differentiable/diff_cache.cpp
@@ -0,0 +1,37 @@
+#include <polyfem/solver/DiffCache.hpp>
+#include <polyfem/State.hpp>
+#include "binding.hpp"
+#include <pybind11/eigen.h>
+
+namespace py = pybind11;
+using namespace polyfem::solver;
+
+void define_differentiable_cache(py::module_ &m)
+{
+  py::enum_<CacheLevel>(m, "CacheLevel", "Caching level of the simulator.")
+      .value("None", CacheLevel::None, "No cache at all")
+      .value("Solution", CacheLevel::Solution, "Cache solutions")
+      .value("Derivatives", CacheLevel::Derivatives,
+             "Cache solutions and quantities for gradient computation")
+      .export_values();
+
+  py::class_<DiffCache>(m, "DiffCache", "Cache of the simulator")
+
+      .def("size", &DiffCache::size,
+           "Get current cache size (number of time steps)")
+
+      .def("solution", &DiffCache::u, "Get solution",
+           py::arg("time_step") = int(0))
+
+      .def("displacement", &DiffCache::u, "Get displacement",
+           py::arg("time_step") = int(0))
+
+      .def("velocity", &DiffCache::v, "Get velocity",
+           py::arg("time_step") = int(0))
+
+      .def("acceleration", &DiffCache::acc, "Get acceleration",
+           py::arg("time_step") = int(0))
+
+      .def("hessian", &DiffCache::gradu_h, "Get energy hessian at solution",
+           py::arg("time_step") = int(0));
+}
diff --git a/src/differentiable/objective.cpp b/src/differentiable/objective.cpp
new file mode 100644
index 0000000..52f85e5
--- /dev/null
+++ b/src/differentiable/objective.cpp
@@ -0,0 +1,42 @@
+// #include <polyfem/solver/AdjointTools.hpp>
+#include <polyfem/solver/forms/adjoint_forms/AdjointForm.hpp>
+#include <polyfem/solver/forms/adjoint_forms/VariableToSimulation.hpp>
+#include <polyfem/State.hpp>
+#include <polyfem/solver/Optimizations.hpp>
+#include <polyfem/utils/MatrixUtils.hpp>
+#include <polyfem/State.hpp>
+#include "binding.hpp"
+#include <pybind11/eigen.h>
+#include <pybind11_json/pybind11_json.hpp>
+
+namespace py = pybind11;
+using namespace polyfem;
+using namespace polyfem::solver;
+
+void define_objective(py::module_ &m)
+{
+    py::class_<AdjointForm, std::shared_ptr<AdjointForm>>(m, "Objective")
+        .def("name", &AdjointForm::name)
+
+        .def("value", &AdjointForm::value, py::arg("x"))
+
+        .def("solution_changed", &AdjointForm::solution_changed, py::arg("x"))
+
+        .def("derivative", [](AdjointForm &obj, State &solver, const Eigen::VectorXd &x, const std::string &wrt) -> Eigen::VectorXd {
+            if (wrt == "solution")
+                return obj.compute_adjoint_rhs(x, solver);
+            else if (wrt == obj.get_variable_to_simulations()[0]->name())
+            {
+                Eigen::VectorXd grad;
+                obj.compute_partial_gradient(x, grad);
+                return grad;
+            }
+            else
+                throw std::runtime_error("Input type does not match objective derivative type!");
+        }, py::arg("solver"), py::arg("x"), py::arg("wrt"));
+
+    m.def("create_objective", &AdjointOptUtils::create_simple_form,
+        py::arg("obj_type"), py::arg("param_type"), py::arg("solver"), py::arg("parameters"));
+
+    // obj = std::make_shared<StressNormForm>(var2sim, *(states[args["state"]]), args);
+}
diff --git a/src/differentiable/utils.cpp b/src/differentiable/utils.cpp
new file mode 100644
index 0000000..c431998
--- /dev/null
+++ b/src/differentiable/utils.cpp
@@ -0,0 +1,25 @@
+#include <polyfem/mesh/SlimSmooth.hpp>
+#include <polyfem/utils/MatrixUtils.hpp>
+#include <polyfem/State.hpp>
+#include "binding.hpp"
+#include <pybind11/eigen.h>
+#include <pybind11_json/pybind11_json.hpp>
+
+namespace py = pybind11;
+using namespace polyfem;
+using namespace polyfem::mesh;
+
+void define_opt_utils(py::module_ &m)
+{
+  m.def(
+      "apply_slim",
+      [](const Eigen::MatrixXd &V, const Eigen::MatrixXi &F,
+         const Eigen::MatrixXd &Vnew) {
+        Eigen::MatrixXd Vsmooth;
+        bool succeed = apply_slim(V, F, Vnew, Vsmooth, 1000);
+        if (!succeed)
+          throw std::runtime_error("SLIM failed to converge!");
+        return Vsmooth;
+      },
+      py::arg("Vold"), py::arg("faces"), py::arg("Vnew"));
+}
diff --git a/src/mesh/CMakeLists.txt b/src/mesh/CMakeLists.txt
new file mode 100644
index 0000000..0c8de90
--- /dev/null
+++ b/src/mesh/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(SOURCES
+  mesh.cpp
+)
+
+source_group(TREE "${CMAKE_CURRENT_SOURCE_DIR}" PREFIX "Source Files" FILES ${SOURCES})
+target_sources(polyfempy PRIVATE ${SOURCES})
diff --git a/src/mesh/binding.hpp b/src/mesh/binding.hpp
new file mode 100644
index 0000000..ceae463
--- /dev/null
+++ b/src/mesh/binding.hpp
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+void define_mesh(py::module_ &m);
diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp
new file mode 100644
index 0000000..9c26575
--- /dev/null
+++ b/src/mesh/mesh.cpp
@@ -0,0 +1,119 @@
+#include <polyfem/mesh/MeshUtils.hpp>
+#include <polyfem/mesh/mesh3D/CMesh3D.hpp>
+#include <polyfem/mesh/mesh2D/CMesh2D.hpp>
+#include "binding.hpp"
+#include <pybind11/eigen.h>
+
+namespace py = pybind11;
+using namespace polyfem;
+using namespace polyfem::mesh;
+
+void define_mesh(py::module_ &m)
+{
+  py::class_<Mesh>(m, "Mesh", "Mesh")
+
+      .def("n_elements", &Mesh::n_elements, "Number of elements")
+
+      .def("n_boundary_elements", &Mesh::n_boundary_elements,
+           "Number of boundary elements (faces in 3D, edges in 2D)")
+
+      .def("n_vertices", &Mesh::n_vertices, "Number of vertices")
+
+      .def("n_cell_vertices", &Mesh::n_cell_vertices,
+           "Number of vertices in one cell", py::arg("cell_id"))
+
+      .def("element_vertex", &Mesh::element_vertex,
+           "Global vertex ID of a local vertex in an element",
+           py::arg("cell_id"), py::arg("local_vertex_id"))
+
+      .def("boundary_element_vertex", &Mesh::boundary_element_vertex,
+           "Global vertex ID of a local vertex in a boundary element",
+           py::arg("boundary_cell_id"), py::arg("local_vertex_id"))
+
+      .def("is_boundary_vertex", &Mesh::is_boundary_vertex,
+           "Check if a vertex is on boundary", py::arg("vertex_id"))
+
+      .def(
+          "bounding_box",
+          [](const Mesh &mesh) {
+            RowVectorNd min, max;
+            mesh.bounding_box(min, max);
+            return py::make_tuple(min, max);
+          },
+          "Get bounding box")
+
+      .def("set_boundary_ids", &Mesh::set_boundary_ids,
+           "Set boundary IDs with an array", py::arg("ids"))
+
+      .def("get_boundary_id", &Mesh::get_boundary_id,
+           "Get boundary ID of one boundary primitive", py::arg("primitive"))
+
+      .def(
+          "set_boundary_side_set_from_bary",
+          [](Mesh &mesh,
+             const std::function<int(const RowVectorNd &)> &boundary_marker) {
+            mesh.compute_boundary_ids(boundary_marker);
+          },
+          "Sets the side set for the boundary conditions, the functions takes the barycenter of the boundary (edge or face)",
+          py::arg("boundary_marker"))
+      .def(
+          "set_boundary_side_set_from_bary_and_boundary",
+          [](Mesh &mesh, const std::function<int(const RowVectorNd &, bool)>
+                             &boundary_marker) {
+            mesh.compute_boundary_ids(boundary_marker);
+          },
+          "Sets the side set for the boundary conditions, the functions takes the barycenter of the boundary (edge or face) and a flag that says if the element is boundary",
+          py::arg("boundary_marker"))
+      .def(
+          "set_boundary_side_set_from_v_ids",
+          [](Mesh &mesh,
+             const std::function<int(const std::vector<int> &, bool)>
+                 &boundary_marker) {
+            mesh.compute_boundary_ids(boundary_marker);
+          },
+          "Sets the side set for the boundary conditions, the functions takes the sorted list of vertex id and a flag that says if the element is boundary",
+          py::arg("boundary_marker"))
+
+      .def("set_body_ids", &Mesh::set_body_ids, "Set body IDs with an array",
+           py::arg("ids"))
+
+      .def("point", &Mesh::point, "Get vertex position", py::arg("vertex_id"))
+
+      .def("set_point", &Mesh::set_point, "Set vertex position",
+           py::arg("vertex_id"), py::arg("position"))
+
+      .def(
+          "vertices",
+          [](const Mesh &mesh) {
+            Eigen::MatrixXd points(mesh.n_vertices(), mesh.dimension());
+            for (int i = 0; i < mesh.n_vertices(); i++)
+              points.row(i) = mesh.point(i);
+            return points;
+          },
+          "Get all vertex positions")
+
+      .def(
+          "set_vertices",
+          [](Mesh &mesh, const Eigen::MatrixXd &points) {
+            for (int i = 0; i < mesh.n_vertices(); i++)
+              mesh.set_point(i, points.row(i));
+          },
+          "Set all vertex positions")
+
+      .def(
+          "elements",
+          [](const Mesh &mesh) {
+            Eigen::MatrixXi elements(mesh.n_elements(), mesh.n_cell_vertices(0));
+            for (int e = 0; e < mesh.n_elements(); e++)
+            {
+               assert(mesh.n_cell_vertices(e) == elements.cols());
+               for (int i = 0; i < elements.cols(); i++)
+                    elements(e, i) = mesh.element_vertex(e, i);
+            }
+            return elements;
+          },
+          "Get all elements of the mesh");
+
+  py::class_<CMesh2D, Mesh>(m, "Mesh2D", "");
+  py::class_<CMesh3D, Mesh>(m, "Mesh3D", "");
+}
diff --git a/src/raster.cpp b/src/raster.cpp
index 5b73708..df1f1e9 100755
--- a/src/raster.cpp
+++ b/src/raster.cpp
@@ -1,150 +1,173 @@
 #include "raster.hpp"
 
-
-
 #include <iostream>
 
 namespace renderer
 {
-	void rasterize_triangle(const Program &program, const UniformAttributes &uniform, const VertexAttributes &v1, const VertexAttributes &v2, const VertexAttributes &v3, Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic> &frameBuffer)
-	{
-		Eigen::Matrix<double, 3, 4> p;
-		p.row(0) = v1.position.array() / v1.position[3];
-		p.row(1) = v2.position.array() / v2.position[3];
-		p.row(2) = v3.position.array() / v3.position[3];
-
-		p.col(0) = ((p.col(0).array() + 1.0) / 2.0) * frameBuffer.rows();
-		p.col(1) = ((p.col(1).array() + 1.0) / 2.0) * frameBuffer.cols();
-
-		int lx = std::floor(p.col(0).minCoeff());
-		int ly = std::floor(p.col(1).minCoeff());
-		int ux = std::ceil(p.col(0).maxCoeff());
-		int uy = std::ceil(p.col(1).maxCoeff());
-
-		lx = std::min(std::max(lx, int(0)), int(frameBuffer.rows() - 1));
-		ly = std::min(std::max(ly, int(0)), int(frameBuffer.cols() - 1));
-		ux = std::max(std::min(ux, int(frameBuffer.rows() - 1)), int(0));
-		uy = std::max(std::min(uy, int(frameBuffer.cols() - 1)), int(0));
-
-		Eigen::Matrix3d A;
-		A.col(0) = p.row(0).segment(0, 3);
-		A.col(1) = p.row(1).segment(0, 3);
-		A.col(2) = p.row(2).segment(0, 3);
-		A.row(2) << 1.0, 1.0, 1.0;
-
-		Eigen::Matrix3d Ai = A.inverse();
-
-		for (unsigned i = lx; i <= ux; i++)
-		{
-			for (unsigned j = ly; j <= uy; j++)
-			{
-
-				Eigen::Vector3d pixel(i + 0.5, j + 0.5, 1);
-				Eigen::Vector3d b = Ai * pixel;
-				if (b.minCoeff() >= 0)
-				{
-					VertexAttributes va = VertexAttributes::interpolate(v1, v2, v3, b[0], b[1], b[2]);
-
-					if (va.position[2] >= -1 && va.position[2] <= 1)
-					{
-						FragmentAttributes frag = program.FragmentShader(va, uniform);
-						frameBuffer(i, j) = program.BlendingShader(frag, frameBuffer(i, j));
-					}
-				}
-			}
-		}
-	}
-
-	void rasterize_triangles(const Program &program, const UniformAttributes &uniform, const std::vector<VertexAttributes> &vertices, Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic> &frameBuffer)
-	{
-		std::vector<VertexAttributes> v(vertices.size());
-		for (unsigned i = 0; i < vertices.size(); i++)
-			v[i] = program.VertexShader(vertices[i], uniform);
-
-		for (unsigned i = 0; i < vertices.size() / 3; i++)
-			rasterize_triangle(program, uniform, v[i * 3 + 0], v[i * 3 + 1], v[i * 3 + 2], frameBuffer);
-	}
-
-	void rasterize_line(const Program &program, const UniformAttributes &uniform, const VertexAttributes &v1, const VertexAttributes &v2, double line_thickness, Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic> &frameBuffer)
-	{
-		Eigen::Matrix<double, 2, 4> p;
-		p.row(0) = v1.position.array() / v1.position[3];
-		p.row(1) = v2.position.array() / v2.position[3];
-
-		p.col(0) = ((p.col(0).array() + 1.0) / 2.0) * frameBuffer.rows();
-		p.col(1) = ((p.col(1).array() + 1.0) / 2.0) * frameBuffer.cols();
-
-		int lx = std::floor(p.col(0).minCoeff() - line_thickness);
-		int ly = std::floor(p.col(1).minCoeff() - line_thickness);
-		int ux = std::ceil(p.col(0).maxCoeff() + line_thickness);
-		int uy = std::ceil(p.col(1).maxCoeff() + line_thickness);
-
-		lx = std::min(std::max(lx, int(0)), int(frameBuffer.rows() - 1));
-		ly = std::min(std::max(ly, int(0)), int(frameBuffer.cols() - 1));
-		ux = std::max(std::min(ux, int(frameBuffer.rows() - 1)), int(0));
-		uy = std::max(std::min(uy, int(frameBuffer.cols() - 1)), int(0));
-
-		Eigen::Vector2f l1(p(0, 0), p(0, 1));
-		Eigen::Vector2f l2(p(1, 0), p(1, 1));
-
-		double t = -1;
-		double ll = (l1 - l2).squaredNorm();
-
-		for (unsigned i = lx; i <= ux; i++)
-		{
-			for (unsigned j = ly; j <= uy; j++)
-			{
-
-				Eigen::Vector2f pixel(i + 0.5, j + 0.5);
-
-				if (ll == 0.0)
-					t = 0;
-				else
-				{
-					t = (pixel - l1).dot(l2 - l1) / ll;
-					t = std::fmax(0, std::fmin(1, t));
-				}
-
-				Eigen::Vector2f pixel_p = l1 + t * (l2 - l1);
-
-				if ((pixel - pixel_p).squaredNorm() < (line_thickness * line_thickness))
-				{
-					VertexAttributes va = VertexAttributes::interpolate(v1, v2, v1, 1 - t, t, 0);
-					FragmentAttributes frag = program.FragmentShader(va, uniform);
-					frameBuffer(i, j) = program.BlendingShader(frag, frameBuffer(i, j));
-				}
-			}
-		}
-	}
-
-	void rasterize_lines(const Program &program, const UniformAttributes &uniform, const std::vector<VertexAttributes> &vertices, double line_thickness, Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic> &frameBuffer)
-	{
-		std::vector<VertexAttributes> v(vertices.size());
-		for (unsigned i = 0; i < vertices.size(); i++)
-			v[i] = program.VertexShader(vertices[i], uniform);
-
-		for (unsigned i = 0; i < vertices.size() / 2; i++)
-			rasterize_line(program, uniform, v[i * 2 + 0], v[i * 2 + 1], line_thickness, frameBuffer);
-	}
-
-	void framebuffer_to_uint8(const Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic> &frameBuffer, std::vector<uint8_t> &image)
-	{
-		const int w = frameBuffer.rows();
-		const int h = frameBuffer.cols();
-		const int comp = 4;
-		const int stride_in_bytes = w * comp;
-		image.resize(w * h * comp, 0);
-
-		for (unsigned wi = 0; wi < w; ++wi)
-		{
-			for (unsigned hi = 0; hi < h; ++hi)
-			{
-				unsigned hif = h - 1 - hi;
-				image[(hi * w * 4) + (wi * 4) + 0] = frameBuffer(wi, hif).color[0];
-				image[(hi * w * 4) + (wi * 4) + 1] = frameBuffer(wi, hif).color[1];
-				image[(hi * w * 4) + (wi * 4) + 2] = frameBuffer(wi, hif).color[2];
-				image[(hi * w * 4) + (wi * 4) + 3] = frameBuffer(wi, hif).color[3];
-			}
-		}
-	}
-}
\ No newline at end of file
+  void rasterize_triangle(const Program &program,
+                          const UniformAttributes &uniform,
+                          const VertexAttributes &v1,
+                          const VertexAttributes &v2,
+                          const VertexAttributes &v3,
+                          Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic,
+                                        Eigen::Dynamic> &frameBuffer)
+  {
+    Eigen::Matrix<double, 3, 4> p;
+    p.row(0) = v1.position.array() / v1.position[3];
+    p.row(1) = v2.position.array() / v2.position[3];
+    p.row(2) = v3.position.array() / v3.position[3];
+
+    p.col(0) = ((p.col(0).array() + 1.0) / 2.0) * frameBuffer.rows();
+    p.col(1) = ((p.col(1).array() + 1.0) / 2.0) * frameBuffer.cols();
+
+    int lx = std::floor(p.col(0).minCoeff());
+    int ly = std::floor(p.col(1).minCoeff());
+    int ux = std::ceil(p.col(0).maxCoeff());
+    int uy = std::ceil(p.col(1).maxCoeff());
+
+    lx = std::min(std::max(lx, int(0)), int(frameBuffer.rows() - 1));
+    ly = std::min(std::max(ly, int(0)), int(frameBuffer.cols() - 1));
+    ux = std::max(std::min(ux, int(frameBuffer.rows() - 1)), int(0));
+    uy = std::max(std::min(uy, int(frameBuffer.cols() - 1)), int(0));
+
+    Eigen::Matrix3d A;
+    A.col(0) = p.row(0).segment(0, 3);
+    A.col(1) = p.row(1).segment(0, 3);
+    A.col(2) = p.row(2).segment(0, 3);
+    A.row(2) << 1.0, 1.0, 1.0;
+
+    Eigen::Matrix3d Ai = A.inverse();
+
+    for (int i = lx; i <= ux; i++)
+    {
+      for (int j = ly; j <= uy; j++)
+      {
+
+        Eigen::Vector3d pixel(i + 0.5, j + 0.5, 1);
+        Eigen::Vector3d b = Ai * pixel;
+        if (b.minCoeff() >= 0)
+        {
+          VertexAttributes va =
+              VertexAttributes::interpolate(v1, v2, v3, b[0], b[1], b[2]);
+
+          if (va.position[2] >= -1 && va.position[2] <= 1)
+          {
+            FragmentAttributes frag = program.FragmentShader(va, uniform);
+            frameBuffer(i, j) = program.BlendingShader(frag, frameBuffer(i, j));
+          }
+        }
+      }
+    }
+  }
+
+  void rasterize_triangles(const Program &program,
+                           const UniformAttributes &uniform,
+                           const std::vector<VertexAttributes> &vertices,
+                           Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic,
+                                         Eigen::Dynamic> &frameBuffer)
+  {
+    std::vector<VertexAttributes> v(vertices.size());
+    for (unsigned i = 0; i < vertices.size(); i++)
+      v[i] = program.VertexShader(vertices[i], uniform);
+
+    for (unsigned i = 0; i < vertices.size() / 3; i++)
+      rasterize_triangle(program, uniform, v[i * 3 + 0], v[i * 3 + 1],
+                         v[i * 3 + 2], frameBuffer);
+  }
+
+  void rasterize_line(const Program &program, const UniformAttributes &uniform,
+                      const VertexAttributes &v1, const VertexAttributes &v2,
+                      double line_thickness,
+                      Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic,
+                                    Eigen::Dynamic> &frameBuffer)
+  {
+    Eigen::Matrix<double, 2, 4> p;
+    p.row(0) = v1.position.array() / v1.position[3];
+    p.row(1) = v2.position.array() / v2.position[3];
+
+    p.col(0) = ((p.col(0).array() + 1.0) / 2.0) * frameBuffer.rows();
+    p.col(1) = ((p.col(1).array() + 1.0) / 2.0) * frameBuffer.cols();
+
+    int lx = std::floor(p.col(0).minCoeff() - line_thickness);
+    int ly = std::floor(p.col(1).minCoeff() - line_thickness);
+    int ux = std::ceil(p.col(0).maxCoeff() + line_thickness);
+    int uy = std::ceil(p.col(1).maxCoeff() + line_thickness);
+
+    lx = std::min(std::max(lx, int(0)), int(frameBuffer.rows() - 1));
+    ly = std::min(std::max(ly, int(0)), int(frameBuffer.cols() - 1));
+    ux = std::max(std::min(ux, int(frameBuffer.rows() - 1)), int(0));
+    uy = std::max(std::min(uy, int(frameBuffer.cols() - 1)), int(0));
+
+    Eigen::Vector2f l1(p(0, 0), p(0, 1));
+    Eigen::Vector2f l2(p(1, 0), p(1, 1));
+
+    double t = -1;
+    double ll = (l1 - l2).squaredNorm();
+
+    for (int i = lx; i <= ux; i++)
+    {
+      for (int j = ly; j <= uy; j++)
+      {
+
+        Eigen::Vector2f pixel(i + 0.5, j + 0.5);
+
+        if (ll == 0.0)
+          t = 0;
+        else
+        {
+          t = (pixel - l1).dot(l2 - l1) / ll;
+          t = std::fmax(0, std::fmin(1, t));
+        }
+
+        Eigen::Vector2f pixel_p = l1 + t * (l2 - l1);
+
+        if ((pixel - pixel_p).squaredNorm() < (line_thickness * line_thickness))
+        {
+          VertexAttributes va =
+              VertexAttributes::interpolate(v1, v2, v1, 1 - t, t, 0);
+          FragmentAttributes frag = program.FragmentShader(va, uniform);
+          frameBuffer(i, j) = program.BlendingShader(frag, frameBuffer(i, j));
+        }
+      }
+    }
+  }
+
+  void rasterize_lines(const Program &program, const UniformAttributes &uniform,
+                       const std::vector<VertexAttributes> &vertices,
+                       double line_thickness,
+                       Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic,
+                                     Eigen::Dynamic> &frameBuffer)
+  {
+    std::vector<VertexAttributes> v(vertices.size());
+    for (unsigned i = 0; i < vertices.size(); i++)
+      v[i] = program.VertexShader(vertices[i], uniform);
+
+    for (unsigned i = 0; i < vertices.size() / 2; i++)
+      rasterize_line(program, uniform, v[i * 2 + 0], v[i * 2 + 1],
+                     line_thickness, frameBuffer);
+  }
+
+  void framebuffer_to_uint8(
+      const Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic>
+          &frameBuffer,
+      std::vector<uint8_t> &image)
+  {
+    const int w = frameBuffer.rows();
+    const int h = frameBuffer.cols();
+    const int comp = 4;
+    // const int stride_in_bytes = w * comp;
+    image.resize(w * h * comp, 0);
+
+    for (int wi = 0; wi < w; ++wi)
+    {
+      for (int hi = 0; hi < h; ++hi)
+      {
+        int hif = h - 1 - hi;
+        image[(hi * w * 4) + (wi * 4) + 0] = frameBuffer(wi, hif).color[0];
+        image[(hi * w * 4) + (wi * 4) + 1] = frameBuffer(wi, hif).color[1];
+        image[(hi * w * 4) + (wi * 4) + 2] = frameBuffer(wi, hif).color[2];
+        image[(hi * w * 4) + (wi * 4) + 3] = frameBuffer(wi, hif).color[3];
+      }
+    }
+  }
+} // namespace renderer
\ No newline at end of file
diff --git a/src/raster.hpp b/src/raster.hpp
index 46edce1..229564e 100755
--- a/src/raster.hpp
+++ b/src/raster.hpp
@@ -8,85 +8,113 @@
 
 namespace renderer
 {
-	struct Material
-	{
-		Eigen::Vector3d diffuse_color;
-		Eigen::Vector3d specular_color;
-		double specular_exponent;
-	};
-	class VertexAttributes
-	{
-	public:
-		VertexAttributes(double x = 0, double y = 0, double z = 0, double w = 1)
-		{
-			position << x, y, z, w;
-		}
+  struct Material
+  {
+    Eigen::Vector3d diffuse_color;
+    Eigen::Vector3d specular_color;
+    double specular_exponent;
+  };
+  class VertexAttributes
+  {
+public:
+    VertexAttributes(double x = 0, double y = 0, double z = 0, double w = 1)
+    {
+      position << x, y, z, w;
+    }
 
-		static VertexAttributes interpolate(
-			const VertexAttributes &a,
-			const VertexAttributes &b,
-			const VertexAttributes &c,
-			const double alpha,
-			const double beta,
-			const double gamma)
-		{
-			VertexAttributes r;
-			r.position = alpha * (a.position / a.position(3)) + beta * (b.position / b.position(3)) + gamma * (c.position / c.position(3));
-			r.color = alpha * a.color + beta * b.color + gamma * c.color;
-			return r;
-		}
+    static VertexAttributes interpolate(const VertexAttributes &a,
+                                        const VertexAttributes &b,
+                                        const VertexAttributes &c,
+                                        const double alpha, const double beta,
+                                        const double gamma)
+    {
+      VertexAttributes r;
+      r.position = alpha * (a.position / a.position(3))
+                   + beta * (b.position / b.position(3))
+                   + gamma * (c.position / c.position(3));
+      r.color = alpha * a.color + beta * b.color + gamma * c.color;
+      return r;
+    }
 
-		Eigen::Vector4d position;
-		Eigen::Vector3d normal;
-		Eigen::Vector3d color;
-		Material material;
-	};
+    Eigen::Vector4d position;
+    Eigen::Vector3d normal;
+    Eigen::Vector3d color;
+    Material material;
+  };
 
-	class FragmentAttributes
-	{
-	public:
-		FragmentAttributes(double r = 0, double g = 0, double b = 0, double a = 1)
-		{
-			color << r, g, b, a;
-		}
-		double depth;
-		Eigen::Vector4d color;
-	};
+  class FragmentAttributes
+  {
+public:
+    FragmentAttributes(double r = 0, double g = 0, double b = 0, double a = 1)
+    {
+      color << r, g, b, a;
+    }
+    double depth;
+    Eigen::Vector4d color;
+  };
 
-	class FrameBufferAttributes
-	{
-	public:
-		FrameBufferAttributes(uint8_t r = 0, uint8_t g = 0, uint8_t b = 0, uint8_t a = 255)
-		{
-			color << r, g, b, a;
-			depth = 2;
-		}
-		double depth;
-		Eigen::Matrix<uint8_t, 4, 1> color;
-	};
+  class FrameBufferAttributes
+  {
+public:
+    FrameBufferAttributes(uint8_t r = 0, uint8_t g = 0, uint8_t b = 0,
+                          uint8_t a = 255)
+    {
+      color << r, g, b, a;
+      depth = 2;
+    }
+    double depth;
+    Eigen::Matrix<uint8_t, 4, 1> color;
+  };
 
-	class UniformAttributes
-	{
-	public:
-		Eigen::Matrix4d M_cam;
+  class UniformAttributes
+  {
+public:
+    Eigen::Matrix4d M_cam;
 
-		Eigen::Matrix4d M_orth;
-		Eigen::Matrix4d M;
-	};
+    Eigen::Matrix4d M_orth;
+    Eigen::Matrix4d M;
+  };
 
-	class Program
-	{
-	public:
-		std::function<VertexAttributes(const VertexAttributes &, const UniformAttributes &)> VertexShader;
-		std::function<FragmentAttributes(const VertexAttributes &, const UniformAttributes &)> FragmentShader;
-		std::function<FrameBufferAttributes(const FragmentAttributes &, const FrameBufferAttributes &)> BlendingShader;
-	};
+  class Program
+  {
+public:
+    std::function<VertexAttributes(const VertexAttributes &,
+                                   const UniformAttributes &)>
+        VertexShader;
+    std::function<FragmentAttributes(const VertexAttributes &,
+                                     const UniformAttributes &)>
+        FragmentShader;
+    std::function<FrameBufferAttributes(const FragmentAttributes &,
+                                        const FrameBufferAttributes &)>
+        BlendingShader;
+  };
 
-	void rasterize_triangle(const Program &program, const UniformAttributes &uniform, const VertexAttributes &v1, const VertexAttributes &v2, const VertexAttributes &v3, Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic> &frameBuffer);
-	void rasterize_triangles(const Program &program, const UniformAttributes &uniform, const std::vector<VertexAttributes> &vertices, Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic> &frameBuffer);
+  void rasterize_triangle(const Program &program,
+                          const UniformAttributes &uniform,
+                          const VertexAttributes &v1,
+                          const VertexAttributes &v2,
+                          const VertexAttributes &v3,
+                          Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic,
+                                        Eigen::Dynamic> &frameBuffer);
+  void rasterize_triangles(const Program &program,
+                           const UniformAttributes &uniform,
+                           const std::vector<VertexAttributes> &vertices,
+                           Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic,
+                                         Eigen::Dynamic> &frameBuffer);
 
-	void rasterize_line(const Program &program, const UniformAttributes &uniform, const VertexAttributes &v1, const VertexAttributes &v2, double line_thickness, Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic> &frameBuffer);
-	void rasterize_lines(const Program &program, const UniformAttributes &uniform, const std::vector<VertexAttributes> &vertices, double line_thickness, Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic> &frameBuffer);
+  void rasterize_line(const Program &program, const UniformAttributes &uniform,
+                      const VertexAttributes &v1, const VertexAttributes &v2,
+                      double line_thickness,
+                      Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic,
+                                    Eigen::Dynamic> &frameBuffer);
+  void rasterize_lines(const Program &program, const UniformAttributes &uniform,
+                       const std::vector<VertexAttributes> &vertices,
+                       double line_thickness,
+                       Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic,
+                                     Eigen::Dynamic> &frameBuffer);
 
-	void framebuffer_to_uint8(const Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic> &frameBuffer, std::vector<uint8_t> &image);
-}
+  void framebuffer_to_uint8(
+      const Eigen::Matrix<FrameBufferAttributes, Eigen::Dynamic, Eigen::Dynamic>
+          &frameBuffer,
+      std::vector<uint8_t> &image);
+} // namespace renderer
diff --git a/src/solver/CMakeLists.txt b/src/solver/CMakeLists.txt
new file mode 100644
index 0000000..4e0d576
--- /dev/null
+++ b/src/solver/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(SOURCES
+  nl_problem.cpp
+)
+
+source_group(TREE "${CMAKE_CURRENT_SOURCE_DIR}" PREFIX "Source Files" FILES ${SOURCES})
+target_sources(polyfempy PRIVATE ${SOURCES})
diff --git a/src/solver/binding.hpp b/src/solver/binding.hpp
new file mode 100644
index 0000000..bfe5839
--- /dev/null
+++ b/src/solver/binding.hpp
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+void define_nonlinear_problem(py::module_ &m);
diff --git a/src/solver/nl_problem.cpp b/src/solver/nl_problem.cpp
new file mode 100644
index 0000000..d217a06
--- /dev/null
+++ b/src/solver/nl_problem.cpp
@@ -0,0 +1,46 @@
+#include <polyfem/solver/NLProblem.hpp>
+#include "binding.hpp"
+#include <pybind11/eigen.h>
+
+namespace py = pybind11;
+using namespace polyfem;
+using namespace polyfem::solver;
+
+void define_nonlinear_problem(py::module_ &m)
+{
+  py::class_<FullNLProblem>(m, "FullNLProblem",
+                            "Full nonlinear problem in the simulation")
+      .def("init", &FullNLProblem::init, "Initialization", py::arg("x0"))
+      .def("value", &FullNLProblem::value)
+      .def(
+          "gradient",
+          [](FullNLProblem &prob, const Eigen::VectorXd &x) {
+            Eigen::VectorXd grad;
+            prob.gradient(x, grad);
+            return grad;
+          },
+          py::arg("x"))
+      .def(
+          "hessian",
+          [](FullNLProblem &prob, const Eigen::VectorXd &x) {
+            StiffnessMatrix hess;
+            prob.hessian(x, hess);
+            return hess;
+          },
+          py::arg("x"))
+      .def("is_step_valid", &FullNLProblem::is_step_valid, py::arg("x0"),
+           py::arg("x1"))
+      .def("is_step_collision_free", &FullNLProblem::is_step_collision_free,
+           py::arg("x0"), py::arg("x1"))
+      .def("max_step_size", &FullNLProblem::max_step_size, py::arg("x0"),
+           py::arg("x1"))
+      .def("line_search_begin", &FullNLProblem::line_search_begin,
+           py::arg("x0"), py::arg("x1"))
+      .def("line_search_end", &FullNLProblem::line_search_end)
+      .def("solution_changed", &FullNLProblem::solution_changed, py::arg("x"))
+      .def("stop", &FullNLProblem::stop, py::arg("x"));
+
+  py::class_<NLProblem, FullNLProblem>(m, "NLProblem", "Nonlinear problem in the simulation")
+  .def("full_to_reduced", &NLProblem::full_to_reduced, py::arg("full"))
+  .def("reduced_to_full", &NLProblem::reduced_to_full, py::arg("reduced"));
+}
\ No newline at end of file
diff --git a/src/state/CMakeLists.txt b/src/state/CMakeLists.txt
new file mode 100644
index 0000000..ef83f10
--- /dev/null
+++ b/src/state/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(SOURCES
+  state.cpp
+)
+
+source_group(TREE "${CMAKE_CURRENT_SOURCE_DIR}" PREFIX "Source Files" FILES ${SOURCES})
+target_sources(polyfempy PRIVATE ${SOURCES})
diff --git a/src/state/binding.hpp b/src/state/binding.hpp
new file mode 100644
index 0000000..046230f
--- /dev/null
+++ b/src/state/binding.hpp
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+void define_pde_types(py::module_ &m);
+void define_solver(py::module_ &m);
+void define_solve(py::module_ &m);
diff --git a/src/state/state.cpp b/src/state/state.cpp
new file mode 100644
index 0000000..9d5a0ef
--- /dev/null
+++ b/src/state/state.cpp
@@ -0,0 +1,727 @@
+#include <polyfem/mesh/MeshUtils.hpp>
+#include <polyfem/assembler/AssemblerUtils.hpp>
+#include <polyfem/assembler/GenericProblem.hpp>
+#include <polyfem/io/Evaluator.hpp>
+#include <polyfem/io/YamlToJson.hpp>
+#include <polyfem/utils/Logger.hpp>
+#include <polyfem/utils/StringUtils.hpp>
+#include <polyfem/utils/JSONUtils.hpp>
+#include <polyfem/utils/GeogramUtils.hpp>
+#include <polyfem/solver/NLProblem.hpp>
+#include <polyfem/time_integrator/ImplicitTimeIntegrator.hpp>
+#include <polyfem/State.hpp>
+
+// #include "raster.hpp"
+
+#include <igl/boundary_facets.h>
+#include <igl/remove_unreferenced.h>
+
+#include <stdexcept>
+
+#include <pybind11_json/pybind11_json.hpp>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/eigen.h>
+#include <pybind11/functional.h>
+#include <pybind11/stl.h>
+#include <pybind11/iostream.h>
+
+#include "differentiable/binding.hpp"
+
+namespace py = pybind11;
+using namespace polyfem;
+
+typedef std::function<Eigen::MatrixXd(double x, double y, double z)> BCFuncV;
+typedef std::function<double(double x, double y, double z)> BCFuncS;
+
+class Assemblers
+{
+};
+
+class PDEs
+{
+};
+
+// TODO add save_time_sequence
+
+namespace
+{
+
+  bool load_json(const std::string &json_file, json &out)
+  {
+    std::ifstream file(json_file);
+
+    if (!file.is_open())
+      return false;
+
+    file >> out;
+
+    if (!out.contains("root_path"))
+      out["root_path"] = json_file;
+
+    return true;
+  }
+
+  bool load_yaml(const std::string &yaml_file, json &out)
+  {
+    try
+    {
+      out = io::yaml_file_to_json(yaml_file);
+      if (!out.contains("root_path"))
+        out["root_path"] = yaml_file;
+    }
+    catch (...)
+    {
+      return false;
+    }
+    return true;
+  }
+
+  void init_globals(State &state)
+  {
+    static bool initialized = false;
+
+    if (!initialized)
+    {
+      state.set_max_threads(1);
+      state.init_logger("", spdlog::level::level_enum::info,
+                        spdlog::level::level_enum::debug, false);
+
+      initialized = true;
+    }
+  }
+
+} // namespace
+
+void define_pde_types(py::module_ &m)
+{
+  const auto &pdes = py::class_<PDEs>(m, "PDEs");
+
+  const std::vector<std::string> materials = {"LinearElasticity",
+                                              "HookeLinearElasticity",
+                                              "SaintVenant",
+                                              "NeoHookean",
+                                              "MooneyRivlin",
+                                              "MooneyRivlin3Param",
+                                              "MooneyRivlin3ParamSymbolic",
+                                              "UnconstrainedOgden",
+                                              "IncompressibleOgden",
+                                              "Stokes",
+                                              "NavierStokes",
+                                              "OperatorSplitting",
+                                              "IncompressibleLinearElasticity",
+                                              "Laplacian",
+                                              "Helmholtz",
+                                              "Bilaplacian",
+                                              "AMIPS",
+                                              "FixedCorotational"};
+
+  for (const auto &a : materials)
+    pdes.attr(a.c_str()) = a;
+
+  pdes.doc() = "List of supported partial differential equations";
+
+  m.def(
+      "is_tensor",
+      [](const std::string &pde) {
+        if (pde == "Laplacian" || pde == "Helmholtz" || pde == "Bilaplacian")
+          return false;
+        return true;
+      },
+      "returns true if the pde is tensorial", py::arg("pde"));
+}
+
+void define_solver(py::module_ &m)
+{
+  const auto setting_lambda = [](State &self, const py::object &settings,
+                                 bool strict_validation) {
+    using namespace polyfem;
+
+    init_globals(self);
+    // py::scoped_ostream_redirect output;
+    const std::string json_string = py::str(settings);
+    self.init(json::parse(json_string), strict_validation);
+  };
+
+  py::class_<State, std::shared_ptr<State>>(m, "Solver")
+      .def(py::init<>())
+
+      .def("is_tensor", [](const State &s) { return s.assembler->is_tensor(); })
+
+      .def(
+          "settings", [](const State &s) { return s.args; },
+          "get PDE and problem parameters from the solver")
+
+      .def("set_settings", setting_lambda,
+           "load PDE and problem parameters from the settings", py::arg("json"),
+           py::arg("strict_validation") = false)
+
+      .def("ndof", &State::ndof, "Dimension of the solution")
+
+      .def(
+          "set_log_level",
+          [](State &s, int log_level) {
+            init_globals(s);
+            //    py::scoped_ostream_redirect output;
+            log_level = std::max(0, std::min(6, log_level));
+            s.set_log_level(static_cast<spdlog::level::level_enum>(log_level));
+          },
+          "sets polyfem log level, valid value between 0 (all logs) and 6 (no logs)",
+          py::arg("log_level"))
+
+      .def(
+          "mesh", [](State &s) -> mesh::Mesh& { return *s.mesh.get(); },
+          "Get mesh in simulator", py::return_value_policy::reference)
+
+      .def(
+          "load_mesh_from_settings",
+          [](State &s) {
+            init_globals(s);
+            s.load_mesh();
+          },
+          "Loads a mesh from the 'mesh' field of the json")
+
+      .def(
+          "load_mesh_from_path",
+          [](State &s, const std::string &path, const bool normalize_mesh,
+             const double vismesh_rel_area, const int n_refs,
+             const double boundary_id_threshold) {
+            init_globals(s);
+            s.args["geometry"] = R"([{ }])"_json;
+            s.args["geometry"][0]["mesh"] = path;
+            s.args["geometry"][0]["advanced"]["normalize_mesh"] =
+                normalize_mesh;
+            s.args["geometry"][0]["surface_selection"] =
+                R"({ "threshold": 0.0 })"_json;
+            s.args["geometry"][0]["surface_selection"]["threshold"] =
+                boundary_id_threshold;
+            s.args["geometry"][0]["n_refs"] = n_refs;
+            s.args["output"]["paraview"]["vismesh_rel_area"] = vismesh_rel_area;
+            s.load_mesh();
+          },
+          "Loads a mesh from the path and 'bc_tag' from the json if any bc tags",
+          py::arg("path"), py::arg("normalize_mesh") = bool(false),
+          py::arg("vismesh_rel_area") = double(0.00001),
+          py::arg("n_refs") = int(0),
+          py::arg("boundary_id_threshold") = double(-1))
+
+      .def(
+          "load_mesh_from_path_and_tags",
+          [](State &s, const std::string &path, const std::string &bc_tag,
+             const bool normalize_mesh, const double vismesh_rel_area,
+             const int n_refs, const double boundary_id_threshold) {
+            init_globals(s);
+            s.args["geometry"] = R"([{ }])"_json;
+            s.args["geometry"][0]["mesh"] = path;
+            s.args["bc_tag"] = bc_tag;
+            s.args["geometry"][0]["advanced"]["normalize_mesh"] =
+                normalize_mesh;
+            s.args["geometry"][0]["surface_selection"] =
+                R"({ "threshold": 0.0 })"_json;
+            s.args["geometry"][0]["surface_selection"]["threshold"] =
+                boundary_id_threshold;
+            s.args["geometry"][0]["n_refs"] = n_refs;
+            s.args["output"]["paraview"]["vismesh_rel_area"] = vismesh_rel_area;
+            s.load_mesh();
+          },
+          "Loads a mesh and bc_tags from path", py::arg("path"),
+          py::arg("bc_tag_path"), py::arg("normalize_mesh") = bool(false),
+          py::arg("vismesh_rel_area") = double(0.00001),
+          py::arg("n_refs") = int(0),
+          py::arg("boundary_id_threshold") = double(-1))
+
+      .def(
+          "set_mesh",
+          [](State &s, const Eigen::MatrixXd &V, const Eigen::MatrixXi &F,
+             const int n_refs, const double boundary_id_threshold) {
+            init_globals(s);
+            s.mesh = mesh::Mesh::create(V, F);
+            s.args["geometry"] = R"([{ }])"_json;
+            s.args["geometry"][0]["n_refs"] = n_refs;
+            s.args["geometry"][0]["surface_selection"] =
+                R"({ "threshold": 0.0 })"_json;
+            s.args["geometry"][0]["surface_selection"]["threshold"] =
+                boundary_id_threshold;
+
+            s.load_mesh();
+          },
+          "Loads a mesh from vertices and connectivity", py::arg("vertices"),
+          py::arg("connectivity"), py::arg("n_refs") = int(0),
+          py::arg("boundary_id_threshold") = double(-1))
+
+      .def(
+          "set_high_order_mesh",
+          [](State &s, const Eigen::MatrixXd &V, const Eigen::MatrixXi &F,
+             const Eigen::MatrixXd &nodes_pos,
+             const std::vector<std::vector<int>> &nodes_indices,
+             const bool normalize_mesh, const double vismesh_rel_area,
+             const int n_refs, const double boundary_id_threshold) {
+            init_globals(s);
+            //    py::scoped_ostream_redirect output;
+
+            s.mesh = mesh::Mesh::create(V, F);
+            s.mesh->attach_higher_order_nodes(nodes_pos, nodes_indices);
+
+            s.args["geometry"][0]["advanced"]["normalize_mesh"] =
+                normalize_mesh;
+            s.args["geometry"][0]["n_refs"] = n_refs;
+            s.args["geometry"][0]["surface_selection"] =
+                R"({ "threshold": 0.0 })"_json;
+            s.args["geometry"][0]["surface_selection"]["threshold"] =
+                boundary_id_threshold;
+            s.args["output"]["paraview"]["vismesh_rel_area"] = vismesh_rel_area;
+
+            s.load_mesh();
+          },
+          "Loads an high order mesh from vertices, connectivity, nodes, and node indices mapping element to nodes",
+          py::arg("vertices"), py::arg("connectivity"), py::arg("nodes_pos"),
+          py::arg("nodes_indices"), py::arg("normalize_mesh") = bool(false),
+          py::arg("vismesh_rel_area") = double(0.00001),
+          py::arg("n_refs") = int(0),
+          py::arg("boundary_id_threshold") = double(-1))
+
+      .def("nl_problem", [](State &s) { return s.solve_data.nl_problem; })
+
+      .def(
+          "solve",
+          [](State &s) {
+            init_globals(s);
+            //    py::scoped_ostream_redirect output;
+            s.stats.compute_mesh_stats(*s.mesh);
+
+            s.build_basis();
+
+            s.assemble_rhs();
+            s.assemble_mass_mat();
+
+            Eigen::MatrixXd sol, pressure;
+            s.solve_problem(sol, pressure);
+
+            s.compute_errors(sol);
+
+            s.save_json(sol);
+            s.export_data(sol, pressure);
+
+            return py::make_tuple(sol, pressure);
+          },
+          "solve the pde")
+      .def(
+          "build_basis",
+          [](State &s) {
+            if (!s.mesh)
+              throw std::runtime_error("Load mesh first!");
+
+            s.build_basis();
+          },
+          "build finite element basis")
+      .def(
+          "assemble",
+          [](State &s) {
+            if (s.bases.size() == 0)
+              throw std::runtime_error("Call build_basis() first!");
+
+            s.assemble_rhs();
+            s.assemble_mass_mat();
+          },
+          "assemble RHS and mass matrix if needed")
+      .def(
+          "init_timestepping",
+          [](State &s, const double t0, const double dt) {
+            if (!s.solve_data.rhs_assembler || s.mass.size() == 0)
+              throw std::runtime_error("Call assemble() first!");
+
+            s.solution_frames.clear();
+            Eigen::MatrixXd sol, pressure;
+            s.init_solve(sol, pressure);
+            s.init_nonlinear_tensor_solve(sol, t0 + dt);
+            s.cache_transient_adjoint_quantities(
+                0, sol,
+                Eigen::MatrixXd::Zero(s.mesh->dimension(),
+                                      s.mesh->dimension()));
+            return sol;
+          },
+          "initialize timestepping", py::arg("t0"), py::arg("dt"))
+      .def(
+          "step_in_time",
+          [](State &s, Eigen::MatrixXd &sol, const double t0, const double dt,
+             const int t) {
+            if (s.assembler->name() == "NavierStokes"
+                || s.assembler->name() == "OperatorSplitting"
+                || s.is_problem_linear() || s.is_homogenization())
+              throw std::runtime_error("Formulation " + s.assembler->name()
+                                       + " is not supported!");
+
+            s.solve_tensor_nonlinear(sol, t);
+            s.cache_transient_adjoint_quantities(
+                t, sol,
+                Eigen::MatrixXd::Zero(s.mesh->dimension(),
+                                      s.mesh->dimension()));
+
+            s.solve_data.time_integrator->update_quantities(sol);
+            s.solve_data.nl_problem->update_quantities(t0 + (t + 1) * dt, sol);
+            s.solve_data.update_dt();
+            s.solve_data.update_barrier_stiffness(sol);
+            return sol;
+          },
+          "step in time", py::arg("solution"), py::arg("t0"), py::arg("dt"),
+          py::arg("t"))
+
+      .def(
+          "solve_adjoint",
+          [](State &s, const Eigen::MatrixXd &adjoint_rhs) {
+            if (adjoint_rhs.cols() != s.diff_cached.size()
+                || adjoint_rhs.rows() != s.diff_cached.u(0).size())
+              throw std::runtime_error("Invalid adjoint_rhs shape!");
+            if (!s.problem->is_time_dependent()
+                && !s.lin_solver_cached) // nonlinear static solve only
+            {
+              Eigen::MatrixXd reduced;
+              for (int i = 0; i < adjoint_rhs.cols(); i++)
+              {
+                Eigen::VectorXd reduced_vec =
+                    s.solve_data.nl_problem->full_to_reduced_grad(
+                        adjoint_rhs.col(i));
+                if (i == 0)
+                  reduced.setZero(reduced_vec.rows(), adjoint_rhs.cols());
+                reduced.col(i) = reduced_vec;
+              }
+              return s.solve_adjoint_cached(reduced);
+            }
+            else
+              return s.solve_adjoint_cached(adjoint_rhs);
+          },
+          "Solve the adjoint equation given the gradient of objective wrt. PDE solution")
+
+      .def(
+          "set_cache_level",
+          [](State &s, solver::CacheLevel level) {
+            s.optimization_enabled = level;
+          },
+          "Set solution caching level", py::arg("cache_level"))
+
+      .def(
+          "get_solution_cache", [](State &s) { return s.diff_cached; },
+          "get the cached solution after simulation, this function requires setting CacheLevel before the simulation")
+
+      .def(
+          "get_solutions", [](State &s) {
+            Eigen::MatrixXd sol(s.diff_cached.u(0).size(), s.diff_cached.size());
+            for (int i = 0; i < sol.cols(); i++)
+              sol.col(i) = s.diff_cached.u(i);
+            return sol;
+          }
+      )
+
+      .def(
+          "compute_errors",
+          [](State &s, Eigen::MatrixXd &sol) { s.compute_errors(sol); },
+          "compute the error", py::arg("solution"))
+
+      .def(
+          "export_data",
+          [](State &s, const Eigen::MatrixXd &sol,
+             const Eigen::MatrixXd &pressure) { s.export_data(sol, pressure); },
+          "exports all data specified in the settings")
+      .def(
+          "export_vtu",
+          [](State &s, std::string &path, const Eigen::MatrixXd &sol,
+             const Eigen::MatrixXd &pressure, const double time,
+             const double dt) {
+            s.out_geom.save_vtu(
+                s.resolve_output_path(path), s, sol, pressure, time, dt,
+                io::OutGeometryData::ExportOptions(s.args, s.mesh->is_linear(),
+                                                   s.problem->is_scalar(),
+                                                   s.solve_export_to_file),
+                s.is_contact_enabled(), s.solution_frames);
+          },
+          "exports the solution as vtu", py::arg("path"), py::arg("solution"),
+          py::arg("pressure") = Eigen::MatrixXd(), py::arg("time") = double(0.), py::arg("dt") = double(0.))
+
+      .def(
+          "get_boundary_sidesets",
+          [](State &s) {
+            //    py::scoped_ostream_redirect output;
+            Eigen::MatrixXd points;
+            Eigen::MatrixXi faces;
+            Eigen::MatrixXd sidesets;
+
+            io::Evaluator::get_sidesets(*s.mesh, points, faces, sidesets);
+
+            return py::make_tuple(points, faces, sidesets);
+          },
+          "exports get the boundary sideset, edges in 2d or trangles in 3d")
+
+      .def(
+          "update_dirichlet_boundary",
+          [](State &self, const int id, const py::object &val, const bool isx,
+             const bool isy, const bool isz, const std::string &interp) {
+            using namespace polyfem;
+            // py::scoped_ostream_redirect output;
+            const json json_val = val;
+            if (assembler::GenericTensorProblem *prob0 =
+                    dynamic_cast<assembler::GenericTensorProblem *>(
+                        self.problem.get()))
+            {
+              prob0->update_dirichlet_boundary(id, json_val, isx, isy, isz,
+                                               interp);
+            }
+            else if (assembler::GenericScalarProblem *prob1 =
+                         dynamic_cast<assembler::GenericScalarProblem *>(
+                             self.problem.get()))
+            {
+              prob1->update_dirichlet_boundary(id, json_val, interp);
+            }
+            else
+            {
+              throw "Updating BC works only for GenericProblems";
+            }
+          },
+          "updates dirichlet boundary", py::arg("id"), py::arg("val"),
+          py::arg("isx") = bool(true), py::arg("isy") = bool(true),
+          py::arg("isz") = bool(true), py::arg("interp") = std::string(""))
+      .def(
+          "update_neumann_boundary",
+          [](State &self, const int id, const py::object &val,
+             const std::string &interp) {
+            using namespace polyfem;
+            // py::scoped_ostream_redirect output;
+            const json json_val = val;
+
+            if (auto prob0 = dynamic_cast<assembler::GenericTensorProblem *>(
+                    self.problem.get()))
+            {
+              prob0->update_neumann_boundary(id, json_val, interp);
+            }
+            else if (auto prob1 =
+                         dynamic_cast<assembler::GenericScalarProblem *>(
+                             self.problem.get()))
+            {
+              prob1->update_neumann_boundary(id, json_val, interp);
+            }
+            else
+            {
+              throw "Updating BC works only for GenericProblems";
+            }
+          },
+          "updates neumann boundary", py::arg("id"), py::arg("val"),
+          py::arg("interp") = std::string(""))
+      .def(
+          "update_pressure_boundary",
+          [](State &self, const int id, const py::object &val,
+             const std::string &interp) {
+            using namespace polyfem;
+            // py::scoped_ostream_redirect output;
+            const json json_val = val;
+
+            if (auto prob = dynamic_cast<assembler::GenericTensorProblem *>(
+                    self.problem.get()))
+            {
+              prob->update_pressure_boundary(id, json_val, interp);
+            }
+            else
+            {
+              throw "Updating BC works only for Tensor GenericProblems";
+            }
+          },
+          "updates pressure boundary", py::arg("id"), py::arg("val"),
+          py::arg("interp") = std::string(""))
+      .def(
+          "update_obstacle_displacement",
+          [](State &self, const int oid, const py::object &val,
+             const std::string &interp) {
+            using namespace polyfem;
+            // py::scoped_ostream_redirect output;
+            const json json_val = val;
+            self.obstacle.change_displacement(oid, json_val, interp);
+          },
+          "updates obstacle displacement", py::arg("oid"), py::arg("val"),
+          py::arg("interp") = std::string(""));
+}
+
+void define_solve(py::module_ &m)
+{
+
+  m.def(
+      "polyfem_command",
+      [](const std::string &json_file, const std::string &yaml_file,
+         const int log_level, const bool strict_validation,
+         const int max_threads, const std::string &output_dir) {
+        json in_args = json({});
+
+        const bool ok = !json_file.empty() ? load_json(json_file, in_args)
+                                           : load_yaml(yaml_file, in_args);
+
+        if (!ok)
+          throw std::runtime_error(
+              fmt::format("unable to open {} file", json_file));
+
+        json tmp = json::object();
+        tmp["/output/log/level"_json_pointer] = int(log_level);
+        tmp["/solver/max_threads"_json_pointer] = max_threads;
+        if (!output_dir.empty())
+          tmp["/output/directory"_json_pointer] =
+              std::filesystem::absolute(output_dir);
+        assert(tmp.is_object());
+        in_args.merge_patch(tmp);
+
+        std::vector<std::string> names;
+        std::vector<Eigen::MatrixXi> cells;
+        std::vector<Eigen::MatrixXd> vertices;
+
+        State state;
+        state.init(in_args, strict_validation);
+        state.load_mesh(/*non_conforming=*/false, names, cells, vertices);
+
+        // Mesh was not loaded successfully; load_mesh() logged the error.
+        if (state.mesh == nullptr)
+          throw std::runtime_error("Failed to load the mesh!");
+
+        state.stats.compute_mesh_stats(*state.mesh);
+
+        state.build_basis();
+
+        state.assemble_rhs();
+        state.assemble_mass_mat();
+
+        Eigen::MatrixXd sol;
+        Eigen::MatrixXd pressure;
+
+        state.solve_problem(sol, pressure);
+
+        state.compute_errors(sol);
+
+        state.save_json(sol);
+        state.export_data(sol, pressure);
+      },
+      "runs the polyfem command, internal usage", py::kw_only(),
+      py::arg("json"), py::arg("yaml") = std::string(""),
+      py::arg("log_level") = int(1), py::arg("strict_validation") = bool(true),
+      py::arg("max_threads") = int(1), py::arg("output_dir") = "");
+
+  //   m.def(
+  //       "solve_febio",
+  //       [](const std::string &febio_file, const std::string &output_path,
+  //          const int log_level, const py::kwargs &kwargs) {
+  //         if (febio_file.empty())
+  //           throw pybind11::value_error("Specify a febio file!");
+
+  //         // json in_args = opts.is_none() ? json({}) : json(opts);
+  //         json in_args = json(static_cast<py::dict>(kwargs));
+
+  //         if (!output_path.empty())
+  //         {
+  //           in_args["export"]["paraview"] = output_path;
+  //           in_args["export"]["wire_mesh"] =
+  //               utils::StringUtils::replace_ext(output_path, "obj");
+  //           in_args["export"]["material_params"] = true;
+  //           in_args["export"]["body_ids"] = true;
+  //           in_args["export"]["contact_forces"] = true;
+  //           in_args["export"]["surface"] = true;
+  //         }
+
+  //         const int discr_order =
+  //             in_args.contains("discr_order") ? int(in_args["discr_order"]) :
+  //             1;
+
+  //         if (discr_order == 1 && !in_args.contains("vismesh_rel_area"))
+  //           in_args["output"]["paraview"]["vismesh_rel_area"] = 1e10;
+
+  //         State state;
+  //         state.init_logger("", log_level, false);
+  //         state.init(in_args);
+  //         state.load_febio(febio_file, in_args);
+  //         state.stats.compute_mesh_stats(*state.mesh);
+
+  //         state.build_basis();
+
+  //         state.assemble_rhs();
+  //         state.assemble_mass_mat();
+
+  //         Eigen::MatrixXd sol, pressure;
+  //         state.solve_problem(sol, pressure);
+
+  //         state.save_json();
+  //         state.export_data(sol, pressure);
+  //       },
+  //       "runs FEBio", py::arg("febio_file"),
+  //       py::arg("output_path") = std::string(""), py::arg("log_level") = 2);
+
+  //   m.def(
+  //       "solve",
+  //       [](const Eigen::MatrixXd &vertices, const Eigen::MatrixXi &cells,
+  //          const py::object &sidesets_func, const int log_level,
+  //          const py::kwargs &kwargs) {
+  //         std::string log_file = "";
+
+  //         std::unique_ptr<State> res =
+  //             std::make_unique<State>();
+  //         State &state = *res;
+  //         state.init_logger(log_file, log_level, false);
+
+  //         json in_args = json(static_cast<py::dict>(kwargs));
+
+  //         state.init(in_args);
+
+  //         state.load_mesh(vertices, cells);
+
+  //         [&]() {
+  //           if (!sidesets_func.is_none())
+  //           {
+  //             try
+  //             {
+  //               const auto fun =
+  //                   sidesets_func
+  //                       .cast<std::function<int(const RowVectorNd
+  //                       &)>>();
+  //               state.mesh->compute_boundary_ids(fun);
+  //               return true;
+  //             }
+  //             catch (...)
+  //             {
+  //               {
+  //               }
+  //             }
+  //             try
+  //             {
+  //               const auto fun = sidesets_func.cast<
+  //                   std::function<int(const RowVectorNd &,
+  //                   bool)>>();
+  //               state.mesh->compute_boundary_ids(fun);
+  //               return true;
+  //             }
+  //             catch (...)
+  //             {
+  //             }
+
+  //             try
+  //             {
+  //               const auto fun = sidesets_func.cast<
+  //                   std::function<int(const std::vector<int> &, bool)>>();
+  //               state.mesh->compute_boundary_ids(fun);
+  //               return true;
+  //             }
+  //             catch (...)
+  //             {
+  //             }
+
+  //             throw pybind11::value_error(
+  //                 "sidesets_func has invalid type, should be a function
+  //                 (p)->int, (p, bool)->int, ([], bool)->int");
+  //           }
+  //         }();
+
+  //         state.stats.compute_mesh_stats(*state.mesh);
+
+  //         state.build_basis();
+
+  //         state.assemble_rhs();
+  //         state.assemble_mass_mat();
+  //         state.solve_problem();
+
+  //         return res;
+  //       },
+  //       "single solve function", py::kw_only(),
+  //       py::arg("vertices") = Eigen::MatrixXd(),
+  //       py::arg("cells") = Eigen::MatrixXi(),
+  //       py::arg("sidesets_func") = py::none(), py::arg("log_level") = 2);
+}
diff --git a/test.py b/test.py
deleted file mode 100644
index 748a741..0000000
--- a/test.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import polyfempy as pf
-import json
-import numpy as np
-
-
-root = "/Users/teseo/Downloads"
-with open(root + "/pushbox.json", "r") as f:
-    config = json.load(f)
-
-config["root_path"] = root + "/pushbox.json"
-
-dt = config["dt"]
-t0 = config["t0"]
-
-
-solver = pf.Solver()
-solver.set_settings(json.dumps(config))
-solver.load_mesh_from_settings()
-
-# inits stuff
-solver.init_timestepping(t0, dt)
-
-for i in range(1, 6):
-    # substepping
-    for t in range(1):
-        solver.step_in_time(t0, dt, t+1)
-    solver.update_dirichlet_boundary(1, [
-        "0.5*t",
-        "0",
-        "0"
-    ])
-    pixles = solver.render(width=84,
-                           height=84,
-                           camera_position=np.array([[0., 0., 0.]]),
-                           camera_fov=90,
-                           ambient_light=np.array([[0., 0., 0.]]),
-                           diffuse_color=np.array([[1., 0., 0.]]),
-                           specular_color=np.array([[1., 0., 0.]]))
-    break
-
-    print(pixles)
-    t0 += dt
-
-
-solver.export_vtu("xxx.vtu")
diff --git a/test/diffSimulator.py b/test/diffSimulator.py
new file mode 100644
index 0000000..cdc4186
--- /dev/null
+++ b/test/diffSimulator.py
@@ -0,0 +1,28 @@
+import torch
+import polyfempy as pf
+
+# Differentiable simulator that computes shape derivatives
+class Simulate(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, solver, vertices):
+        # Update solver setup
+        solver.mesh().set_vertices(vertices)
+        # Enable caching intermediate variables in the simulation, which will be used for solve_adjoint
+        solver.set_cache_level(pf.CacheLevel.Derivatives)
+        # Run simulation
+        solver.solve()
+        # Collect transient simulation solutions
+        sol = torch.tensor(solver.get_solutions())
+        # Cache solver for backward gradient propagation
+        ctx.solver = solver
+        return sol
+
+    @staticmethod
+    @torch.autograd.function.once_differentiable
+    def backward(ctx, grad_output):
+        # solve_adjoint only needs to be called once per solver, independent of number of types of optimization variables
+        ctx.solver.solve_adjoint(grad_output)
+        # Compute shape derivatives
+        return None, torch.tensor(pf.shape_derivative(ctx.solver))
+    
\ No newline at end of file
diff --git a/test/test_basic.ipynb b/test/test_basic.ipynb
new file mode 100644
index 0000000..452551f
--- /dev/null
+++ b/test/test_basic.ipynb
@@ -0,0 +1,126 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polyfempy as pf\n",
+    "import json\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "root = \"../data/differentiable/input\"\n",
+    "with open(root + \"/initial-contact.json\", \"r\") as f:\n",
+    "    config = json.load(f)\n",
+    "\n",
+    "config[\"root_path\"] = root + \"/initial-contact.json\"\n",
+    "\n",
+    "solver = pf.Solver()\n",
+    "solver.set_settings(json.dumps(config), False)\n",
+    "solver.set_log_level(2)\n",
+    "solver.load_mesh_from_settings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mesh = solver.mesh()\n",
+    "\n",
+    "print(mesh.n_vertices())\n",
+    "print(mesh.n_elements())\n",
+    "print(mesh.n_cell_vertices(1))\n",
+    "print(mesh.element_vertex(3, 0))\n",
+    "print(mesh.boundary_element_vertex(3, 0))\n",
+    "assert(mesh.is_boundary_vertex(1))\n",
+    "\n",
+    "min, max = mesh.bounding_box()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = solver.settings()\n",
+    "t0 = config[\"time\"][\"t0\"]\n",
+    "dt = config[\"time\"][\"dt\"]\n",
+    "\n",
+    "# inits stuff\n",
+    "solver.build_basis()\n",
+    "solver.assemble()\n",
+    "sol = solver.init_timestepping(t0, dt)\n",
+    "\n",
+    "for i in range(1, 5):\n",
+    "    \n",
+    "    # substepping\n",
+    "    for t in range(1):\n",
+    "        sol = solver.step_in_time(sol, t0, dt, t+1)\n",
+    "\n",
+    "    t0 += dt\n",
+    "    solver.export_vtu(sol, np.zeros((0, 0)), t0, dt, \"step_\" + str(i) + \".vtu\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prob = solver.nl_problem()\n",
+    "\n",
+    "h = prob.hessian(sol)\n",
+    "reduced_sol = prob.full_to_reduced(sol)\n",
+    "full_sol = prob.reduced_to_full(reduced_sol)\n",
+    "\n",
+    "assert(np.linalg.norm(full_sol - sol.flatten()) < 1e-12)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cache = solver.get_solution_cache()\n",
+    "\n",
+    "print(cache.solution(1).shape)\n",
+    "print(cache.velocity(2).shape)\n",
+    "print(cache.acceleration(3).shape)\n",
+    "print(cache.hessian(4).shape)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/test/test_bending.py b/test/test_bending.py
deleted file mode 100644
index 414a773..0000000
--- a/test/test_bending.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import unittest
-
-import platform
-import polyfempy as pf
-# from .utils import plot
-import os
-
-
-class BendingTest(unittest.TestCase):
-    def test_run(self):
-        return
-        root_folder = os.path.join("..", "data", "data")
-
-        dir_path = os.path.dirname(os.path.realpath(__file__))
-        mesh_path = os.path.join(dir_path, root_folder, "square_beam.mesh")
-        tag_path = os.path.join(dir_path, root_folder, "square_beam.txt")
-
-        settings = pf.Settings(discr_order=1, pde=pf.PDEs.LinearElasticity)
-
-        settings.set_material_params("E", 200000)
-        settings.set_material_params("nu", 0.35)
-
-        settings.pde = pf.PDEs.LinearElasticity
-
-        problem = pf.Problem()
-        problem.add_dirichlet_value(2, [0, 0, 0])
-        problem.add_neumann_value(1, [0, -100, 0])
-
-        settings.problem = problem
-
-        solver = pf.Solver()
-
-        solver.settings(settings)
-        solver.load_mesh_from_path_and_tags(
-            mesh_path, tag_path, normalize_mesh=False, vismesh_rel_area=0.1)
-
-        solver.solve()
-
-        pts, tets, el_id, bid, disp = solver.get_sampled_solution()
-        vertices = pts + disp
-        mises, _ = solver.get_sampled_mises_avg()
-
-        vs, fs, tr, stress, mises = solver.get_sampled_traction_forces()
-        assert(vs.shape[0] > 0)
-        assert(vs.shape[1] == 3)
-
-        assert(fs.shape[0] > 0)
-        assert(fs.shape[1] == 3)
-
-        assert(tr.shape[0] == fs.shape[0])
-        assert(tr.shape[1] == 3)
-
-        # plot(vertices, tets, mises)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/test_diff.py b/test/test_diff.py
new file mode 100644
index 0000000..99ed8ad
--- /dev/null
+++ b/test/test_diff.py
@@ -0,0 +1,87 @@
+import polyfempy as pf
+import json
+import numpy as np
+import torch
+
+torch.set_default_dtype(torch.float64)
+
+# Differentiable simulator that computes shape derivatives
+class Simulate(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx, solvers, vertices):
+        solutions = []
+        for solver in solvers:
+            solver.mesh().set_vertices(vertices)
+            solver.set_cache_level(pf.CacheLevel.Derivatives) # enable backward derivatives
+            solver.solve()
+            cache = solver.get_solution_cache()
+            sol = torch.zeros((solver.ndof(), cache.size()))
+            for t in range(cache.size()):
+                sol[:, t] = torch.tensor(cache.solution(t))
+            solutions.append(sol)
+        
+        ctx.save_for_backward(vertices)
+        ctx.solvers = solvers
+        return tuple(solutions)
+
+    @staticmethod
+    @torch.autograd.function.once_differentiable
+    def backward(ctx, *grad_output):
+        vertices, = ctx.saved_tensors
+        grad = torch.zeros_like(vertices)
+        for i, solver in enumerate(ctx.solvers):
+            solver.solve_adjoint(grad_output[i])
+            grad += torch.tensor(pf.shape_derivative(solver))
+        return None, grad
+
+
+root = "../data/differentiable/input"
+with open(root + "/initial-contact.json", "r") as f:
+    config = json.load(f)
+
+config["root_path"] = root + "/initial-contact.json"
+
+# Simulation 1
+
+solver1 = pf.Solver()
+solver1.set_settings(json.dumps(config), False)
+solver1.set_log_level(2)
+solver1.load_mesh_from_settings()
+# solver1.solve()
+
+mesh = solver1.mesh()
+v = mesh.vertices()
+vertices = torch.tensor(solver1.mesh().vertices(), requires_grad=True)
+
+# Simulation 2
+
+config["initial_conditions"]["velocity"][0]["value"] = [3, 0]
+solver2 = pf.Solver()
+solver2.set_settings(json.dumps(config), False)
+solver2.set_log_level(2)
+solver2.load_mesh_from_settings()
+# solver2.solve()
+
+# Verify gradient
+
+def loss(vertices):
+    solutions1, solutions2 = Simulate.apply([solver1, solver2], vertices)
+    return torch.linalg.norm(solutions1[:, -1]) * torch.linalg.norm(solutions2[:, -1])
+
+torch.set_printoptions(12)
+
+param = vertices.clone().detach().requires_grad_(True)
+theta = torch.randn_like(param)
+l = loss(param)
+l.backward()
+grad = param.grad
+t = 1e-6
+with torch.no_grad():
+    analytic = torch.tensordot(grad, theta)
+    f1 = loss(param + theta * t)
+    f2 = loss(param - theta * t)
+    fd = (f1 - f2) / (2 * t)
+    print(f'grad {analytic}, fd {fd} {(f1 - l) / t} {(l - f2) / t}, relative err {abs(analytic - fd) / abs(analytic):.3e}')
+    print(f'f(x+dx)={f1}, f(x)={l.detach()}, f(x-dx)={f2}')
+    assert(abs(analytic - fd) <= 1e-4 * abs(analytic))
diff --git a/test/test_differentiable.ipynb b/test/test_differentiable.ipynb
new file mode 100644
index 0000000..ac47352
--- /dev/null
+++ b/test/test_differentiable.ipynb
@@ -0,0 +1,139 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polyfempy as pf\n",
+    "import json\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "\n",
+    "torch.set_default_dtype(torch.float64)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Differentiable simulator that computes shape derivatives\n",
+    "class Simulate(torch.autograd.Function):\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def forward(ctx, solver, vertices):\n",
+    "        # Update solver setup\n",
+    "        solver.mesh().set_vertices(vertices)\n",
+    "        # Enable caching intermediate variables in the simulation, which will be used for solve_adjoint\n",
+    "        solver.set_cache_level(pf.CacheLevel.Derivatives)\n",
+    "        # Run simulation\n",
+    "        solver.solve()\n",
+    "        # Collect transient simulation solutions\n",
+    "        cache = solver.get_solution_cache()\n",
+    "        sol = torch.zeros((solver.ndof(), cache.size()))\n",
+    "        for t in range(cache.size()):\n",
+    "            sol[:, t] = torch.tensor(cache.solution(t))\n",
+    "        # Cache solver for backward gradient propagation\n",
+    "        ctx.solver = solver\n",
+    "        return sol\n",
+    "\n",
+    "    @staticmethod\n",
+    "    @torch.autograd.function.once_differentiable\n",
+    "    def backward(ctx, grad_output):\n",
+    "        # solve_adjoint only needs to be called once per solver, independent of number of types of optimization variables\n",
+    "        ctx.solver.solve_adjoint(grad_output)\n",
+    "        # Compute shape derivatives\n",
+    "        return None, torch.tensor(pf.shape_derivative(ctx.solver))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "root = \"../data/differentiable/input\"\n",
+    "with open(root + \"/initial-contact.json\", \"r\") as f:\n",
+    "    config = json.load(f)\n",
+    "\n",
+    "config[\"root_path\"] = root + \"/initial-contact.json\"\n",
+    "\n",
+    "# Simulation 1\n",
+    "\n",
+    "solver1 = pf.Solver()\n",
+    "solver1.set_settings(json.dumps(config), False)\n",
+    "solver1.set_log_level(2)\n",
+    "solver1.load_mesh_from_settings()\n",
+    "\n",
+    "mesh = solver1.mesh()\n",
+    "v = mesh.vertices()\n",
+    "vertices = torch.tensor(solver1.mesh().vertices(), requires_grad=True)\n",
+    "\n",
+    "# Simulation 2\n",
+    "\n",
+    "config[\"initial_conditions\"][\"velocity\"][0][\"value\"] = [3, 0]\n",
+    "solver2 = pf.Solver()\n",
+    "solver2.set_settings(json.dumps(config), False)\n",
+    "solver2.set_log_level(2)\n",
+    "solver2.load_mesh_from_settings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Verify gradient\n",
+    "\n",
+    "def loss(vertices):\n",
+    "    solutions1 = Simulate.apply(solver1, vertices)\n",
+    "    solutions2 = Simulate.apply(solver2, vertices)\n",
+    "    print(obj.value(vertices.reshape(-1,1).detach().numpy()))\n",
+    "    return torch.linalg.norm(solutions1[:, -1]) * torch.linalg.norm(solutions2[:, -1])\n",
+    "\n",
+    "torch.set_printoptions(12)\n",
+    "\n",
+    "param = vertices.clone().detach().requires_grad_(True)\n",
+    "theta = torch.randn_like(param)\n",
+    "l = loss(param)\n",
+    "l.backward()\n",
+    "grad = param.grad\n",
+    "t = 1e-6\n",
+    "with torch.no_grad():\n",
+    "    analytic = torch.tensordot(grad, theta)\n",
+    "    f1 = loss(param + theta * t)\n",
+    "    f2 = loss(param - theta * t)\n",
+    "    fd = (f1 - f2) / (2 * t)\n",
+    "    print(f'grad {analytic}, fd {fd} {(f1 - l) / t} {(l - f2) / t}, relative err {abs(analytic - fd) / abs(analytic):.3e}')\n",
+    "    print(f'f(x+dx)={f1}, f(x)={l.detach()}, f(x-dx)={f2}')\n",
+    "    assert(abs(analytic - fd) <= 1e-4 * abs(analytic))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/test/test_febio.py b/test/test_febio.py
deleted file mode 100644
index 7e94d31..0000000
--- a/test/test_febio.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import unittest
-
-import platform
-import polyfempy as pf
-# from .utils import plot
-import os
-
-
-class FeBioTest(unittest.TestCase):
-    def test_run(self):
-        root_folder = os.path.join("..", "data", "data")
-
-        dir_path = os.path.dirname(os.path.realpath(__file__))
-        febio_file = os.path.join(dir_path, root_folder, "test.feb")
-
-        opts = {
-            "solver_type": "Eigen::SparseLU",
-            "solver_params": {
-                "gradNorm": 1e-1,
-                "nl_iterations": 10
-            }
-        }
-
-        pf.solve_febio(
-            febio_file,
-            output_path="test.vtu",
-            log_level=1,
-            **opts)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/test_gravity.py b/test/test_gravity.py
deleted file mode 100644
index 2bb97b0..0000000
--- a/test/test_gravity.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import unittest
-
-import numpy as np
-import polyfempy as pf
-# from .utils import plot
-
-
-class Gravity(unittest.TestCase):
-    def test_run(self):
-        n_pts = 3
-
-        extend = np.linspace(0, 1, n_pts)
-        x, y = np.meshgrid(extend, extend, sparse=False, indexing='xy')
-        pts = np.column_stack((x.ravel(), y.ravel()))
-
-        # Create connectivity
-        faces = np.ndarray([(n_pts-1)**2, 4],dtype=np.int32)
-
-        index = 0
-        for i in range(n_pts-1):
-            for j in range(n_pts-1):
-                faces[index, :] = np.array([
-                    j + i * n_pts,
-                    j+1 + i * n_pts,
-                    j+1 + (i+1) * n_pts,
-                    j + (i+1) * n_pts
-                ])
-                index = index + 1
-
-        # create settings
-        # We use a linear material model and linear elments
-        settings = pf.Settings(discr_order=1, pde=pf.PDEs.LinearElasticity)
-
-        settings.set_material_params("E", 2100)
-        settings.set_material_params("nu", 0.3)
-
-        problem = pf.Gravity(force=0.1)
-        settings.problem = problem
-
-        solver = pf.Solver()
-        solver.settings(settings)
-
-        # This time we are using pts and faces instead of loading from the disk
-        solver.set_mesh(pts, faces)
-
-        solver.solve()
-
-        #Animation frame, last one
-        frame = -1
-
-        # Get the solution
-        pts = solver.get_sampled_points_frames()
-        tets = solver.get_sampled_connectivity_frames()
-        disp = solver.get_sampled_solution_frames()
-
-
-        # diplace the mesh
-        vertices = pts[frame] + disp[frame]
-
-        # and get the stresses
-        mises = solver.get_sampled_mises_avg_frames()
-
-        # finally plot
-        # plot(vertices, tets[frame], mises[frame])
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/test_inflation.py b/test/test_inflation.py
deleted file mode 100644
index ecbfcdc..0000000
--- a/test/test_inflation.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import unittest
-
-import polyfempy as pf
-import numpy as np
-import platform
-
-# from .utils import plot
-
-import os
-
-
-class InflationTest(unittest.TestCase):
-    def test_run(self):
-        root_folder = os.path.join("..", "data", "data")
-
-        solver = pf.Solver()
-
-        # some setup
-        dir_path = os.path.dirname(os.path.realpath(__file__))
-        mesh_path = os.path.join(dir_path, root_folder, "circle2.msh")
-
-        settings = pf.Settings(discr_order=2, pde=pf.PDEs.Laplacian)
-
-        problem = pf.Problem(rhs=0)
-        problem.add_dirichlet_value("all", 10)
-        settings.set_problem(problem)
-
-        solver.settings(settings)
-        solver.load_mesh_from_path(
-            mesh_path, normalize_mesh=True, vismesh_rel_area=0.00001)
-
-        solver.solve()
-        sol = solver.get_solution()
-        ##########################################################################
-
-        # now we got the solution of the first laplacian, we use it as rhs for the second one
-        # setup zero bc and use sol as rhs
-        problem = pf.Problem(rhs=0)
-        problem.add_dirichlet_value("all", 0)
-        settings.problem = problem
-
-        # reload the parameters and mesh
-        solver.settings(settings)
-        solver.load_mesh_from_path(
-            mesh_path, normalize_mesh=True, vismesh_rel_area=0.00001)
-
-        # set the rhs as prev sol
-        solver.set_rhs(sol)
-
-        solver.solve()
-
-        # get the solution on a densly sampled mesh
-        vertices, tris, el_id, bid, sol = solver.get_sampled_solution()
-
-        # the dense mesh is disconnected, so we need to connecit it back
-        _, res = np.unique(vertices, axis=0, return_inverse=True)
-        vertices, resi = np.unique(vertices, axis=0, return_index=True)
-
-        faces = np.ndarray([len(tris), 3], dtype=np.int64)
-        vv = np.ndarray([len(vertices), 3], dtype=np.float64)
-
-        for i in range(len(tris)):
-            faces[i] = np.array(
-                [res[tris[i][0]], res[tris[i][1]], res[tris[i][2]]])
-
-        for i in range(len(vv)):
-            vv[i] = np.array([vertices[i][0], vertices[i][1], sol[resi[i]][0]])
-
-        # plot(vv, faces, None)
-
-        # #save obj
-        # with open(output, "w") as file:
-        # 	# use sol as z
-        # 	for i in range(len(vertices)):
-        # 		file.write("v {} {} {}\n".format())
-
-        # 	for i in range(len(tris)):
-        # 		i0 = res[tris[i][0]]
-        # 		i1 = res[tris[i][1]]
-        # 		i2 = res[tris[i][2]]
-        # 		file.write("f {} {} {}\n".format(i0+1, i1+1, i2+1))
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/test_plane_hole.py b/test/test_plane_hole.py
deleted file mode 100644
index 83d7b57..0000000
--- a/test/test_plane_hole.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import unittest
-
-import polyfempy as pf
-# from .utils import plot
-import os
-import platform
-
-
-class BendingTest(unittest.TestCase):
-    def test_run(self):
-        #     self.run_one(1)
-        #     self.run_one(2)
-
-        # def run_one(self, discr_order):
-        discr_order = 2
-        root_folder = os.path.join("..", "data", "data")
-
-        dir_path = os.path.dirname(os.path.realpath(__file__))
-        mesh_path = os.path.join(dir_path, root_folder, "plane_hole.obj")
-
-        settings = pf.Settings(discr_order=discr_order,
-                               pde=pf.PDEs.LinearElasticity)
-
-        settings.set_material_params("E", 210000)
-        settings.set_material_params("nu", 0.3)
-
-        problem = pf.Problem()
-        problem.set_x_symmetric(1)
-        problem.set_y_symmetric(4)
-
-        problem.add_neumann_value(3, [100, 0])
-        # problem.add_neumann_value(3, lambda x,y,z: [100, 0, 0])
-
-        settings.problem = problem
-
-        solver = pf.Solver()
-
-        solver.settings(settings)
-        solver.load_mesh_from_path(mesh_path, normalize_mesh=True)
-
-        solver.solve()
-
-        pts, tets, el_id, bid, disp = solver.get_sampled_solution()
-        vertices = pts + disp
-        mises, _ = solver.get_sampled_mises_avg()
-
-        solver.compute_errors()
-
-        # plot(vertices, tets, mises)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/test_pythonic.py b/test/test_pythonic.py
deleted file mode 100644
index 27c2db3..0000000
--- a/test/test_pythonic.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import unittest
-
-import numpy as np
-import polyfempy as pf
-# from .utils import plot
-
-
-class PythonicTest(unittest.TestCase):
-    def test_run(self):
-        n_pts = 3
-
-        extend = np.linspace(0, 1, n_pts)
-        x, y = np.meshgrid(extend, extend, sparse=False, indexing='xy')
-        pts = np.column_stack((x.ravel(), y.ravel()))
-
-        # Create connectivity
-        faces = np.ndarray([(n_pts-1)**2, 4], dtype=np.int32)
-
-        index = 0
-        for i in range(n_pts-1):
-            for j in range(n_pts-1):
-                faces[index, :] = np.array([
-                    j + i * n_pts,
-                    j+1 + i * n_pts,
-                    j+1 + (i+1) * n_pts,
-                    j + (i+1) * n_pts
-                ])
-                index = index + 1
-
-        def sideset(p):
-            if p[0] <= 0.001:
-                return 4
-            return 1
-
-        solver = pf.solve(
-            vertices=pts,
-            cells=faces,
-            sidesets_func=sideset,
-            diriclet_bc=[{"id": 4, "value": [0, 0]}],
-            materials=[{"id": 0, "E": 2100, "nu": 0.3}],
-            rhs=[0, 0.1],
-            pde=pf.PDEs.LinearElasticity,
-            discr_order=1
-        )
-
-        log = solver.get_log()
-        print(log["time_solving"])
-
-        # Get the solution
-        pts, tris, el_id, bid, fun = solver.get_sampled_solution()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/test_sim.py b/test/test_sim.py
new file mode 100644
index 0000000..297fcb4
--- /dev/null
+++ b/test/test_sim.py
@@ -0,0 +1,34 @@
+import polyfempy as pf
+import json
+import numpy as np
+
+pf.polyfem_command(json="/Users/zizhouhuang/Desktop/polyfem-python/data/contact/examples/2D/unit-tests/5-squares.json", log_level=2, max_threads=16)
+
+root = "/Users/zizhouhuang/Desktop/polyfem/data/contact/examples/2D/unit-tests"
+with open(root + "/5-squares.json", "r") as f:
+    config = json.load(f)
+
+config["root_path"] = root + "/5-squares.json"
+
+solver = pf.Solver()
+solver.set_settings(json.dumps(config), False)
+solver.set_log_level(2)
+solver.load_mesh_from_settings()
+
+config = solver.settings()
+t0 = config["time"]["t0"]
+dt = config["time"]["dt"]
+
+# inits stuff
+solver.build_basis()
+solver.assemble()
+sol = solver.init_timestepping(t0, dt)
+
+for i in range(1, 5):
+    
+    # substepping
+    for t in range(1):
+        sol = solver.step_in_time(sol, t0, dt, t+1)
+
+    t0 += dt
+    solver.export_vtu(sol, np.zeros((0, 0)), t0, dt, "step_" + str(i) + ".vtu")
diff --git a/test/test_torsion.py b/test/test_torsion.py
deleted file mode 100644
index 2997752..0000000
--- a/test/test_torsion.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import unittest
-
-import polyfempy as pf
-# from .utils import plot
-
-import os
-import platform
-
-
-class TorsionTest(unittest.TestCase):
-    def test_run(self):
-        root_folder = os.path.join("..", "data", "data")
-        dir_path = os.path.dirname(os.path.realpath(__file__))
-        mesh_path = os.path.join(dir_path, root_folder, "square_beam_h.HYBRID")
-
-        settings = pf.Settings(
-            discr_order=1, pde=pf.PDEs.NonLinearElasticity, nl_solver_rhs_steps=5)
-
-        settings.set_material_params("E", 200)
-        settings.set_material_params("nu", 0.35)
-
-        problem = pf.Torsion(
-            fixed_boundary=5, turning_boundary=6, axis_coordiante=2, n_turns=0.5)
-        settings.problem = problem
-
-        solver = pf.Solver()
-        solver.settings(settings)
-        solver.load_mesh_from_path(
-            mesh_path, normalize_mesh=False, vismesh_rel_area=0.001)
-
-        solver.solve()
-
-        pts, tets, el_id, bid, disp = solver.get_sampled_solution()
-        vertices = pts + disp
-        mises, _ = solver.get_sampled_mises_avg()
-
-        # plot(vertices, tets, mises)
-
-
-if __name__ == '__main__':
-    unittest.main()