iccs_notebook for data exploration

feelpp · Jul 8, 2024 · 6a7c765 · 6a7c765
1 parent 45c7420
commit 6a7c765
Showing 1 changed file with 244 additions and 0 deletions.
diff --git a/src/notebooks/reader.ipynb b/src/notebooks/reader.ipynb
@@ -0,0 +1,244 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "24860b03-131e-488d-a5d1-6f4b44521a13",
+   "metadata": {},
+   "source": [
+    "## General Description\n",
+    "\n",
+    "This script reads data from a VTK EnSight Gold binary file and processes it to extract information about solar shading coefficients and the geometry of cells over a series of timesteps. The key steps include:\n",
+    "\n",
+    "1. **Setting up the Reader**: The script initializes a VTK reader for the EnSight Gold binary file format and configures it to read all variables from a specified case file.\n",
+    "\n",
+    "2. **Processing Data for Each Timestep**: For each timestep, the script updates the reader with the current time value, retrieves cell data including solar shading coefficients, and extracts the geometric points defining each cell. This data is combined and  converted into a pandas DataFrame with appropriate column names.\n",
+    "\n",
+    "3. **Calculating the Area of Triangles**: A function calculates the area of a triangle given its vertices. This function is used to compute the area for each triangle (cell) in the DataFrame, and these areas are added as a new column in the DataFrame.\n",
+    "\n",
+    "4. **Saving the Data**: The resulting DataFrame is saved to a CSV file for further analysis or use.\n",
+    "\n",
+    "This script provides a comprehensive framework for reading, processing, and analyzing time-dependent geometric and scalar data from a VTK EnSight Gold binary file.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "61ee8919-195b-4d1f-8ff1-a94603596143",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import vtk\n",
+    "\n",
+    "# Setup the reader\n",
+    "# We suppose that the necessarty files are in the same directory as the notebook\n",
+    "case_file = \"strasbourg_sm_lod1/strasbourg_sm_lod1/City_Energy_Modeling.case\"\n",
+    "# Setup the reader\n",
+    "reader = vtk.vtkEnSightGoldBinaryReader()\n",
+    "reader.SetCaseFileName(case_file)\n",
+    "reader.ReadAllVariablesOn()  # Ensure all variables are read\n",
+    "reader.Update()\n",
+    "\n",
+    "# Retrieve the output\n",
+    "output = reader.GetOutput()\n",
+    "timeset=reader.GetTimeSets()\n",
+    "time=timeset.GetItem(0)\n",
+    "timesteps=time.GetSize()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "96eede0d-c63e-4187-94e0-f473df853c42",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of timesteps:  731\n",
+      "Number of cells:  212692\n",
+      "Number of scalar points?:  212692\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Number of timesteps: \", timesteps)\n",
+    "print(\"Number of cells: \", reader.GetOutput().GetBlock(0).GetNumberOfCells())\n",
+    "print(\"Number of scalar points?: \", reader.GetOutput().GetBlock(0).GetCellData().GetArray(\"solar_shading_coeff\").GetNumberOfTuples())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a6e05806-9a22-488a-902d-495e19899620",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "212692\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(output.GetNumberOfCells())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2b0e67e6-39ca-4eed-954d-2e210306a996",
+   "metadata": {},
+   "source": [
+    "## Panda Dataframes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "db60d12d-b71a-41e7-a11f-14bfafe3d00a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   timestep         x_1         y_1  z_1         x_2         y_2  z_2  \\\n",
+      "0         0 -539.268982  600.049988  0.0 -538.495972  599.625000  0.0   \n",
+      "1         0 -541.656982  611.940979  0.0 -527.434998  605.570984  0.0   \n",
+      "2         0 -527.434998  605.570984  0.0 -538.495972  599.625000  0.0   \n",
+      "3         0 -487.437988  635.723999  0.0 -490.282013  625.955994  0.0   \n",
+      "4         0 -488.596985  642.520020  0.0 -487.437988  635.723999  0.0   \n",
+      "\n",
+      "          x_3         y_3  z_3  scalar        area  \n",
+      "0 -538.987976  599.625000  0.0     0.0    0.104548  \n",
+      "1 -539.268982  600.049988  0.0     0.0   76.950973  \n",
+      "2 -539.268982  600.049988  0.0     0.0    4.648543  \n",
+      "3 -518.830994  625.955994  0.0     0.0  139.433298  \n",
+      "4 -518.830994  625.955994  0.0     0.0  112.334297  \n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Initialize an empty list to accumulate the arrays\n",
+    "data = []\n",
+    "\n",
+    "for t in range(timesteps):\n",
+    "    reader.SetTimeValue(time.GetValue(t))\n",
+    "    reader.Update()\n",
+    "    output = reader.GetOutput().GetBlock(0)\n",
+    "    cell_data = output.GetCellData()\n",
+    "    sc = cell_data.GetArray(\"solar_shading_coeff\")\n",
+    "\n",
+    "\n",
+    "    for i in range(output.GetNumberOfCells()):\n",
+    "        cell = output.GetCell(i)\n",
+    "        pts = cell.GetPoints()\n",
+    "        scalar = sc.GetTuple(i)\n",
+    "        np_pts = np.array([pts.GetPoint(j) for j in range(pts.GetNumberOfPoints())])\n",
+    "        \n",
+    "        flattened_pts = np_pts.flatten()\n",
+    "        combined_data = np.append(flattened_pts, scalar[0])\n",
+    "        \n",
+    "        # Append the timestep and combined data to the list\n",
+    "        data.append([t] + combined_data.tolist())\n",
+    "        # data.append(np.insert(combined_data, 0, t))\n",
+    "\n",
+    "# Create column names for the DataFrame\n",
+    "columns = ['timestep'] + ['x_1'] + ['y_1'] + ['z_1'] + ['x_2'] + ['y_2'] + ['z_2'] + ['x_3'] + ['y_3'] + ['z_3'] + ['scalar']\n",
+    "\n",
+    "# Convert the list to a DataFrame\n",
+    "df = pd.DataFrame(data, columns=columns)\n",
+    "\n",
+    "# Function to calculate the area of a triangle given its vertices\n",
+    "def calculate_triangle_area(p1, p2, p3):\n",
+    "    # Calculate the vectors for two sides of the triangle\n",
+    "    v1 = p2 - p1\n",
+    "    v2 = p3 - p1\n",
+    "    # Calculate the cross product of the vectors\n",
+    "    cross_product = np.cross(v1, v2)\n",
+    "    # Calculate the area of the triangle (0.5 * magnitude of the cross product)\n",
+    "    area = 0.5 * np.linalg.norm(cross_product)\n",
+    "    return area\n",
+    "\n",
+    "# Calculate the area for each triangle and add it as a new column\n",
+    "areas = []\n",
+    "for index, row in df.iterrows():\n",
+    "    p1 = np.array([row['x_1'], row['y_1'], row['z_1']])\n",
+    "    p2 = np.array([row['x_2'], row['y_2'], row['z_2']])\n",
+    "    p3 = np.array([row['x_3'], row['y_3'], row['z_3']])\n",
+    "    area = calculate_triangle_area(p1, p2, p3)\n",
+    "    areas.append(area)\n",
+    "\n",
+    "df['area'] = areas\n",
+    "\n",
+    "# Print the resulting DataFrame\n",
+    "print(df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "2b590da1-99db-454c-acd3-b785e2c283b4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "         timestep     point_1      point_2  point_3     point_4      point_5  \\\n",
+      "5104603        23  725.992004 -1084.459961      0.0  725.992004 -1084.459961   \n",
+      "5104604        23  739.231018 -1185.939941      0.0  737.825989 -1185.089966   \n",
+      "5104605        23  737.825989 -1185.089966      0.0  737.825989 -1185.089966   \n",
+      "5104606        23  763.109985 -1150.270020      0.0  760.265015 -1155.369995   \n",
+      "5104607        23  760.265015 -1155.369995      0.0  760.265015 -1155.369995   \n",
+      "\n",
+      "         point_6     point_7      point_8  point_9  scalar       area  \n",
+      "5104603      5.2  726.484009 -1079.369995      5.2     1.0  13.295592  \n",
+      "5104604      0.0  739.231018 -1185.939941      6.5     1.0   5.336898  \n",
+      "5104605      6.5  739.231018 -1185.939941      6.5     1.0   5.336898  \n",
+      "5104606      0.0  763.109985 -1150.270020      3.0     1.0   8.759744  \n",
+      "5104607      3.0  763.109985 -1150.270020      3.0     1.0   8.759744  \n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df.tail())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "7e470e53-0506-43a2-9d12-50deef85fbfa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv('initial_dataframe.csv', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}