diff --git a/tidy.ipynb b/tidy.ipynb index 76a7455..0d3807e 100644 --- a/tidy.ipynb +++ b/tidy.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -10,11 +10,82 @@ "import polars.selectors as cs" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create `anzsic_2006.csv`" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'Divisions': shape: (19, 2)\n", + " ┌───────────────┬─────────────────────────────────┐\n", + " │ division_code ┆ division_title │\n", + " │ --- ┆ --- │\n", + " │ str ┆ str │\n", + " ╞═══════════════╪═════════════════════════════════╡\n", + " │ A ┆ Agriculture, Forestry and Fish… │\n", + " │ B ┆ Mining │\n", + " │ C ┆ Manufacturing │\n", + " │ D ┆ Electricity, Gas, Water and Wa… │\n", + " │ E ┆ Construction │\n", + " │ … ┆ … │\n", + " │ O ┆ Public Administration and Safe… │\n", + " │ P ┆ Education and Training │\n", + " │ Q ┆ Health Care and Social Assista… │\n", + " │ R ┆ Arts and Recreation Services │\n", + " │ S ┆ Other Services │\n", + " └───────────────┴─────────────────────────────────┘,\n", + " 'Groups': shape: (214, 3)\n", + " ┌────────────┬─────────────────────────────────┬──────────────┐\n", + " │ group_code ┆ group_title ┆ group_parent │\n", + " │ --- ┆ --- ┆ --- │\n", + " │ str ┆ str ┆ str │\n", + " ╞════════════╪═════════════════════════════════╪══════════════╡\n", + " │ 011 ┆ Nursery and Floriculture Produ… ┆ 01 │\n", + " │ 012 ┆ Mushroom and Vegetable Growing ┆ 01 │\n", + " │ 013 ┆ Fruit and Tree Nut Growing ┆ 01 │\n", + " │ 014 ┆ Sheep, Beef Cattle and Grain F… ┆ 01 │\n", + " │ 015 ┆ Other Crop Growing ┆ 01 │\n", + " │ … ┆ … ┆ … │\n", + " │ 952 ┆ Funeral, Crematorium and Cemet… ┆ 95 │\n", + " │ 953 ┆ Other Personal Services ┆ 95 │\n", + " │ 954 ┆ Religious Services ┆ 95 │\n", + " │ 955 ┆ Civic, Professional and Other … ┆ 95 │\n", + " │ 960 ┆ Private Households Employing S… ┆ 96 │\n", + " └────────────┴─────────────────────────────────┴──────────────┘,\n", + " 'Classes': shape: (506, 3)\n", + " ┌────────────┬─────────────────────────────────┬──────────────┐\n", + " │ class_code ┆ class_title ┆ class_parent │\n", + " │ --- ┆ --- ┆ --- │\n", + " │ str ┆ str ┆ str │\n", + " ╞════════════╪═════════════════════════════════╪══════════════╡\n", + " │ 0111 ┆ Nursery Production (Under Cove… ┆ 011 │\n", + " │ 0112 ┆ Nursery Production (Outdoors) ┆ 011 │\n", + " │ 0113 ┆ Turf Growing ┆ 011 │\n", + " │ 0114 ┆ Floriculture Production (Under… ┆ 011 │\n", + " │ 0115 ┆ Floriculture Production (Outdo… ┆ 011 │\n", + " │ … ┆ … ┆ … │\n", + " │ 9552 ┆ Labour Association Services ┆ 955 │\n", + " │ 9559 ┆ Other Interest Group Services … ┆ 955 │\n", + " │ 9601 ┆ Private Households Employing S… ┆ 960 │\n", + " │ 9602 ┆ Undifferentiated Goods-Produci… ┆ 960 │\n", + " │ 9603 ┆ Undifferentiated Service-Produ… ┆ 960 │\n", + " └────────────┴─────────────────────────────────┴──────────────┘}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def read_anzsic_sheet(sheet_name, prefix):\n", " df = (\n", @@ -51,9 +122,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (86, 3)
division_codesubdivision_codesubdivision_title
strstrstr
"A""01""Agriculture"
"A""02""Aquaculture"
"A""03""Forestry and Logging"
"A""04""Fishing, Hunting and Trapping"
"A""05""Agriculture, Forestry and Fish…
"R""91""Sports and Recreation Activiti…
"R""92""Gambling Activities"
"S""94""Repair and Maintenance"
"S""95""Personal and Other Services"
"S""96""Private Households Employing S…
" + ], + "text/plain": [ + "shape: (86, 3)\n", + "┌───────────────┬──────────────────┬─────────────────────────────────┐\n", + "│ division_code ┆ subdivision_code ┆ subdivision_title │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ str │\n", + "╞═══════════════╪══════════════════╪═════════════════════════════════╡\n", + "│ A ┆ 01 ┆ Agriculture │\n", + "│ A ┆ 02 ┆ Aquaculture │\n", + "│ A ┆ 03 ┆ Forestry and Logging │\n", + "│ A ┆ 04 ┆ Fishing, Hunting and Trapping │\n", + "│ A ┆ 05 ┆ Agriculture, Forestry and Fish… │\n", + "│ … ┆ … ┆ … │\n", + "│ R ┆ 91 ┆ Sports and Recreation Activiti… │\n", + "│ R ┆ 92 ┆ Gambling Activities │\n", + "│ S ┆ 94 ┆ Repair and Maintenance │\n", + "│ S ┆ 95 ┆ Personal and Other Services │\n", + "│ S ┆ 96 ┆ Private Households Employing S… │\n", + "└───────────────┴──────────────────┴─────────────────────────────────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Create Subdivisions\n", "anzsic06[\"Subdivisions\"] = pl.read_excel(\n", @@ -85,7 +194,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -111,7 +220,7 @@ "metadata": {}, "outputs": [], "source": [ - "anzsic06_combined.write_csv(\"anzsic06.csv\")" + "anzsic06_combined.write_csv(\"anzsic_2006.csv\", quote_style=\"always\")" ] } ], @@ -131,7 +240,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.0" + "version": "3.12.7" } }, "nbformat": 4,