diff --git a/athina/steps/code_execution_v2.py b/athina/steps/code_execution_v2.py
index d760c67..b68bc2d 100644
--- a/athina/steps/code_execution_v2.py
+++ b/athina/steps/code_execution_v2.py
@@ -122,7 +122,6 @@ def _create_or_initialize_sandbox(self):
             raise ValueError("session_id is required for e2b execution")
 
         try:
-
             running_sandboxes = Sandbox.list()
 
             for sandbox in running_sandboxes:
@@ -138,11 +137,6 @@ def _create_or_initialize_sandbox(self):
                     ),
                     metadata={"session_id": self.session_id},
                 )
-                if self.code.startswith("!"):
-                    # Run the code as a command
-                    commands = map(lambda x: x[1:], self.code.split("\n"))
-                    for command in commands:
-                        self._sandbox.commands.run(command)
                 print(f"Created new sandbox with ID: {self._sandbox.sandbox_id}")
 
         except Exception as e:
@@ -238,10 +232,9 @@ def _execute_e2b(self, input_data: dict, start_time: float) -> StepResult:
 
         The execution follows these steps:
         1. Initialize/connect to sandbox
-        2. Split input into commands and Python code
-        3. Initialize input variables in sandbox
-        4. Execute main code
-        5. Capture and extract output variables
+        2. Initialize input variables in sandbox
+        3. Execute code (either as commands or Python)
+        4. Capture and extract output variables for Python code
         """
         try:
             self._create_or_initialize_sandbox()
@@ -253,67 +246,73 @@ def _execute_e2b(self, input_data: dict, start_time: float) -> StepResult:
                     start_time=start_time,
                 )
 
-            # Split code into commands and Python code
-            lines = self.code.split("\n")
-            commands = [
-                line.strip()[1:]
-                for line in lines
-                if line.strip().startswith(COMMAND_PREFIX)
-            ]
-            python_code = [
-                line for line in lines if not line.strip().startswith(COMMAND_PREFIX)
-            ]
-
-            if not python_code:
-                # Only commands were provided
-                print("Only commands were provided")
+            # Initialize input variables if we're running Python code
+            if not self.code.strip().startswith(COMMAND_PREFIX):
+                input_vars_code = self._prepare_input_variables(input_data)
+                if input_vars_code:
+                    setup_code = "\n".join(input_vars_code)
+                    setup_execution = self._sandbox.run_code(setup_code)
+                    if setup_execution.error:
+                        print(
+                            f"Error setting up input variables: {setup_execution.error}"
+                        )
+
+            # Execute code based on type (commands or Python)
+            if self.code.strip().startswith(COMMAND_PREFIX):
+                # Handle command execution
+                commands = [
+                    line.strip()[1:] for line in self.code.split("\n") if line.strip()
+                ]
+                output = []
+                for command in commands:
+                    command_result = self._sandbox.commands.run(command)
+                    if command_result.error or command_result.exit_code != 0:
+                        return self._create_step_result(
+                            status="error",
+                            data=f"Failed to execute command: {command}\nexit_code: {command_result.exit_code}\nDetails:\n{command_result.error}",
+                            start_time=start_time,
+                        )
+                    print(f"Command output: {command_result}")
+                    if command_result.stdout:
+                        output.extend(command_result.stdout)
                 return self._create_step_result(
                     status="success",
-                    data="Commands executed successfully",
+                    data="".join(output),
                     start_time=start_time,
                     exported_vars={},
                 )
-
-            # Initialize input variables
-            input_vars_code = self._prepare_input_variables(input_data)
-            if input_vars_code:
-                setup_code = "\n".join(input_vars_code)
-                setup_execution = self._sandbox.run_code(setup_code)
-                if setup_execution.error:
-                    print(f"Error setting up input variables: {setup_execution.error}")
-
-            # Execute main code
-            main_code = "\n".join(python_code)
-            execution = self._sandbox.run_code(main_code)
-            if execution.error:
-                return self._create_step_result(
-                    status="error",
-                    data=f"Failed to execute the code.\nDetails:\n{execution.error}",
-                    start_time=start_time,
+            else:
+                # Handle Python code execution
+                execution = self._sandbox.run_code(self.code)
+                if execution.error:
+                    return self._create_step_result(
+                        status="error",
+                        data=f"Failed to execute the code.\nDetails:\n{execution.error}",
+                        start_time=start_time,
+                    )
+
+                # Capture variables for Python execution
+                var_execution = self._sandbox.run_code(VARIABLE_CAPTURE_CODE)
+                if var_execution.error:
+                    print(f"Error capturing variables: {var_execution.error}")
+                    return self._create_step_result(
+                        status="success",
+                        data="\n".join(execution.logs.stdout),
+                        start_time=start_time,
+                        exported_vars={},
+                    )
+
+                # Extract and return results
+                exported_vars = self._extract_exported_vars(
+                    "\n".join(var_execution.logs.stdout)
                 )
-
-            # Capture variables
-            var_execution = self._sandbox.run_code(VARIABLE_CAPTURE_CODE)
-            if var_execution.error:
-                print(f"Error capturing variables: {var_execution.error}")
                 return self._create_step_result(
                     status="success",
                     data="\n".join(execution.logs.stdout),
                     start_time=start_time,
-                    exported_vars={},
+                    exported_vars=exported_vars,
                 )
 
-            # Extract and return results
-            exported_vars = self._extract_exported_vars(
-                "\n".join(var_execution.logs.stdout)
-            )
-            return self._create_step_result(
-                status="success",
-                data="\n".join(execution.logs.stdout),
-                start_time=start_time,
-                exported_vars=exported_vars,
-            )
-
         except Exception as e:
             print(f"\nUnexpected error: {str(e)}")
             return self._create_step_result(
diff --git a/pyproject.toml b/pyproject.toml
index 8c5556b..4f4a04b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "athina"
-version = "1.6.31"
+version = "1.6.32"
 description = "Python SDK to configure and run evaluations for your LLM-based application"
 authors = ["Shiv Sakhuja <shiv@athina.ai>", "Akshat Gupta <akshat@athina.ai>", "Vivek Aditya <vivek@athina.ai>", "Akhil Bisht <akhil@athina.ai>"]
 readme = "README.md"