diff --git a/lectures/_static/lecture_specific/inequality/data.ipynb b/lectures/_static/lecture_specific/inequality/data.ipynb new file mode 100644 index 00000000..97aea652 --- /dev/null +++ b/lectures/_static/lecture_specific/inequality/data.ipynb @@ -0,0 +1,133 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "258b4bc9-2964-470a-8010-05c2162f5e05", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: wbgapi in /Users/longye/anaconda3/lib/python3.10/site-packages (1.0.12)\n", + "Requirement already satisfied: plotly in /Users/longye/anaconda3/lib/python3.10/site-packages (5.22.0)\n", + "Requirement already satisfied: requests in /Users/longye/anaconda3/lib/python3.10/site-packages (from wbgapi) (2.31.0)\n", + "Requirement already satisfied: tabulate in /Users/longye/anaconda3/lib/python3.10/site-packages (from wbgapi) (0.9.0)\n", + "Requirement already satisfied: PyYAML in /Users/longye/anaconda3/lib/python3.10/site-packages (from wbgapi) (6.0)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /Users/longye/anaconda3/lib/python3.10/site-packages (from plotly) (8.4.1)\n", + "Requirement already satisfied: packaging in /Users/longye/anaconda3/lib/python3.10/site-packages (from plotly) (23.1)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/longye/anaconda3/lib/python3.10/site-packages (from requests->wbgapi) (1.26.16)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/longye/anaconda3/lib/python3.10/site-packages (from requests->wbgapi) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/longye/anaconda3/lib/python3.10/site-packages (from requests->wbgapi) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/longye/anaconda3/lib/python3.10/site-packages (from requests->wbgapi) (2024.6.2)\n" + ] + } + ], + "source": [ + "!pip install wbgapi plotly\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import random as rd\n", + "import wbgapi as wb\n", + "import plotly.express as px\n", + "\n", + "url = 'https://media.githubusercontent.com/media/QuantEcon/high_dim_data/main/SCF_plus/SCF_plus_mini.csv'\n", + "df = pd.read_csv(url)\n", + "df_income_wealth = df.dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9630a07a-fce5-474e-92af-104e67e82be5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: quantecon in /Users/longye/anaconda3/lib/python3.10/site-packages (0.7.1)\n", + "Requirement already satisfied: requests in /Users/longye/anaconda3/lib/python3.10/site-packages (from quantecon) (2.31.0)\n", + "Requirement already satisfied: numpy>=1.17.0 in /Users/longye/anaconda3/lib/python3.10/site-packages (from quantecon) (1.26.3)\n", + "Requirement already satisfied: numba>=0.49.0 in /Users/longye/anaconda3/lib/python3.10/site-packages (from quantecon) (0.59.1)\n", + "Requirement already satisfied: sympy in /Users/longye/anaconda3/lib/python3.10/site-packages (from quantecon) (1.12)\n", + "Requirement already satisfied: scipy>=1.5.0 in /Users/longye/anaconda3/lib/python3.10/site-packages (from quantecon) (1.12.0)\n", + "Requirement already satisfied: llvmlite<0.43,>=0.42.0dev0 in /Users/longye/anaconda3/lib/python3.10/site-packages (from numba>=0.49.0->quantecon) (0.42.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/longye/anaconda3/lib/python3.10/site-packages (from requests->quantecon) (2024.6.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/longye/anaconda3/lib/python3.10/site-packages (from requests->quantecon) (3.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/longye/anaconda3/lib/python3.10/site-packages (from requests->quantecon) (2.0.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/longye/anaconda3/lib/python3.10/site-packages (from requests->quantecon) (1.26.16)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/longye/anaconda3/lib/python3.10/site-packages (from sympy->quantecon) (1.3.0)\n" + ] + } + ], + "source": [ + "!pip install quantecon\n", + "import quantecon as qe\n", + "\n", + "varlist = ['n_wealth', # net wealth \n", + " 't_income', # total income\n", + " 'l_income'] # labor income\n", + "\n", + "df = df_income_wealth\n", + "years = df.year.unique()\n", + "\n", + "# create lists to store Gini for each inequality measure\n", + "results = {}\n", + "\n", + "for var in varlist:\n", + " # create lists to store Gini\n", + " gini_yr = []\n", + " for year in years:\n", + " # repeat the observations according to their weights\n", + " counts = list(round(df[df['year'] == year]['weights'] ))\n", + " y = df[df['year'] == year][var].repeat(counts)\n", + " y = np.asarray(y)\n", + " \n", + " rd.shuffle(y) # shuffle the sequence\n", + " \n", + " # calculate and store Gini\n", + " gini = qe.gini_coefficient(y)\n", + " gini_yr.append(gini)\n", + " \n", + " results[var] = gini_yr\n", + "\n", + "# Convert to DataFrame\n", + "results = pd.DataFrame(results, index=years)\n", + "results.to_csv(\"usa-gini-nwealth-tincome-lincome.csv\", index_label='year')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d59e876b-2f77-4fa7-b79a-8e455ad82d43", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv b/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv index bf820364..3ec95a66 100644 --- a/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv +++ b/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv @@ -1,21 +1,21 @@ year,n_wealth,t_income,l_income -1950,0.8257332034366338,0.44248654139458626,0.5342948198773412 -1953,0.8059487586599329,0.4264544060935945,0.5158978980963702 -1956,0.8121790488050616,0.44426942873399283,0.5349293526208142 -1959,0.795206874163792,0.43749348077061573,0.5213985948309416 -1962,0.8086945076579359,0.4435843103853645,0.5345127915054341 -1965,0.7904149225687935,0.43763715466663444,0.7487860020887753 -1968,0.7982885066993497,0.4208620794438902,0.5242396427381545 -1971,0.7911574835420259,0.4233344246090255,0.5576454812313466 -1977,0.7571418922185215,0.46187678800902543,0.5704448110072049 -1983,0.7494335400643013,0.439345618464469,0.5662220844385915 -1989,0.7715705301674302,0.5115249581654197,0.601399568747142 -1992,0.7508126614055308,0.4740650672076798,0.5983592657979563 -1995,0.7569492388110265,0.48965523558400603,0.5969779516716903 -1998,0.7603291991801185,0.49117441585168614,0.5774462841723305 -2001,0.7816118750507056,0.5239092994681135,0.6042739644967272 -2004,0.7700355469522361,0.4884350383903255,0.5981432201792727 -2007,0.7821413776486978,0.5197156312086187,0.626345219575322 -2010,0.8250825295193438,0.5195972120145615,0.6453653328291903 -2013,0.8227698931835303,0.531400174984336,0.6498682917772644 -2016,0.8342975903562234,0.5541400068900825,0.6706846793375284 +1950,0.8257332034366366,0.44248654139458743,0.534294819877344 +1953,0.805948758659935,0.4264544060935942,0.5158978980963682 +1956,0.8121790488050612,0.44426942873399367,0.5349293526208106 +1959,0.7952068741637912,0.43749348077061534,0.5213985948309414 +1962,0.8086945076579386,0.44358431038536356,0.5345127915054446 +1965,0.7904149225687949,0.4376371546666344,0.7487860020887701 +1968,0.7982885066993503,0.4208620794438885,0.5242396427381534 +1971,0.7911574835420282,0.4233344246090255,0.5576454812313462 +1977,0.7571418922185215,0.46187678800902554,0.57044481100722 +1983,0.749433540064301,0.4393456184644682,0.5662220844385925 +1989,0.7715705301674285,0.5115249581654115,0.6013995687471289 +1992,0.7508126614055305,0.4740650672076754,0.5983592657979544 +1995,0.7569492388110274,0.4896552355840001,0.5969779516717039 +1998,0.7603291991801172,0.49117441585168525,0.5774462841723346 +2001,0.781611875050703,0.523909299468113,0.6042739644967232 +2004,0.7700355469522372,0.48843503839032354,0.5981432201792916 +2007,0.782141377648698,0.5197156312086207,0.6263452195753227 +2010,0.825082529519342,0.5195972120145641,0.6453653328291843 +2013,0.8227698931835299,0.5314001749843426,0.6498682917772886 +2016,0.8342975903562537,0.55414000689009,0.6706846793375292 diff --git a/lectures/inequality.md b/lectures/inequality.md index f6c0ff67..cd6dfcf0 100644 --- a/lectures/inequality.md +++ b/lectures/inequality.md @@ -247,7 +247,7 @@ The following code block imports a subset of the dataset `SCF_plus` for 2016, which is derived from the [Survey of Consumer Finances](https://en.wikipedia.org/wiki/Survey_of_Consumer_Finances) (SCF). ```{code-cell} ipython3 -url = 'https://media.githubusercontent.com/media/QuantEcon/high_dim_data/main/SCF_plus/SCF_plus_mini.csv' +url = 'https://github.com/QuantEcon/high_dim_data/raw/main/SCF_plus/SCF_plus_mini.csv' df = pd.read_csv(url) df_income_wealth = df.dropna() ``` @@ -435,6 +435,8 @@ Let's examine the Gini coefficient in some simulations. The code below computes the Gini coefficient from a sample. +(code:gini-coefficient)= + ```{code-cell} ipython3 def gini_coefficient(y): @@ -481,6 +483,7 @@ You can check this by looking up the expression for the mean of a lognormal distribution. ```{code-cell} ipython3 +%%time k = 5 σ_vals = np.linspace(0.2, 4, k) n = 2_000 @@ -616,51 +619,11 @@ We will use US data from the {ref}`Survey of Consumer Finances - -```{code-cell} ipython3 -ginis = pd.read_csv("_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv", index_col='year') +data_url = 'https://github.com/QuantEcon/lecture-python-intro/raw/main/lectures/_static/lecture_specific/inequality/usa-gini-nwealth-tincome-lincome.csv' +ginis = pd.read_csv(data_url, index_col='year') ginis.head(n=5) ``` @@ -687,10 +650,6 @@ One possibility is that this change is mainly driven by technology. However, we will see below that not all advanced economies experienced similar growth of inequality. - - - - ### Cross-country comparisons of income inequality Earlier in this lecture we used `wbgapi` to get Gini data across many countries @@ -1093,3 +1052,90 @@ plt.show() ```{solution-end} ``` + +```{exercise} +:label: inequality_ex3 + +The {ref}`code to compute the Gini coefficient is listed in the lecture above `. + +This code uses loops to calculate the coefficient based on income or wealth data. + +This function can be re-written using vectorization which will greatly improve the computational efficiency when using `python`. + +Re-write the function `gini_coefficient` using `numpy` and vectorized code. + +You can compare the output of this new function with the one above, and note the speed differences. +``` + +```{solution-start} inequality_ex3 +:class: dropdown +``` + +Let's take a look at some raw data for the US that is stored in `df_income_wealth` + +```{code-cell} ipython3 +df_income_wealth.describe() +``` + +```{code-cell} ipython3 +df_income_wealth.head(n=4) +``` + +We will focus on wealth variable `n_wealth` to compute a Gini coefficient for the year 2016. + +```{code-cell} ipython3 +data = df_income_wealth[df_income_wealth.year == 2016].sample(3000, random_state=1) +``` + +```{code-cell} ipython3 +data.head(n=2) +``` + +We can first compute the Gini coefficient using the function defined in the lecture above. + +```{code-cell} ipython3 +gini_coefficient(data.n_wealth.values) +``` + +Now we can write a vectorized version using `numpy` + +```{code-cell} ipython3 +def gini(y): + n = len(y) + y_1 = np.reshape(y, (n, 1)) + y_2 = np.reshape(y, (1, n)) + g_sum = np.sum(np.abs(y_1 - y_2)) + return g_sum / (2 * n * np.sum(y)) +``` +```{code-cell} ipython3 +gini(data.n_wealth.values) +``` +Let's simulate five populations by drawing from a lognormal distribution as before + +```{code-cell} ipython3 +k = 5 +σ_vals = np.linspace(0.2, 4, k) +n = 2_000 +σ_vals = σ_vals.reshape((k,1)) +μ_vals = -σ_vals**2/2 +y_vals = np.exp(μ_vals + σ_vals*np.random.randn(n)) +``` +We can compute the Gini coefficient for these five populations using the vectorized function, the computation time is shown below: + +```{code-cell} ipython3 +%%time +gini_coefficients =[] +for i in range(k): + gini_coefficients.append(gini(y_vals[i])) +``` +This shows the vectorized function is much faster. +This gives us the Gini coefficients for these five households. + +```{code-cell} ipython3 +gini_coefficients +``` +```{solution-end} +``` + + +