Sampling Distribution of a Statistic¶
In [1]:
Copied!
import pandas as pd
import seaborn as sns
import pandas as pd
import seaborn as sns
In [2]:
Copied!
data = pd.read_csv("../data/loans_income.csv")
data
data = pd.read_csv("../data/loans_income.csv")
data
Out[2]:
| x | |
|---|---|
| 0 | 67000 |
| 1 | 52000 |
| 2 | 100000 |
| 3 | 78762 |
| 4 | 37041 |
| ... | ... |
| 49995 | 40000 |
| 49996 | 54000 |
| 49997 | 50000 |
| 49998 | 82000 |
| 49999 | 70000 |
50000 rows × 1 columns
In [3]:
Copied!
sample_data = pd.DataFrame({
'income': data["x"].sample(1000),
'type': "Data"
})
sample_data
sample_data = pd.DataFrame({
'income': data["x"].sample(1000),
'type': "Data"
})
sample_data
Out[3]:
| income | type | |
|---|---|---|
| 18869 | 70000 | Data |
| 34424 | 103000 | Data |
| 11601 | 51985 | Data |
| 39244 | 46000 | Data |
| 41527 | 36000 | Data |
| ... | ... | ... |
| 8679 | 100000 | Data |
| 6003 | 86100 | Data |
| 22853 | 49898 | Data |
| 43955 | 82000 | Data |
| 37150 | 45000 | Data |
1000 rows × 2 columns
In [4]:
Copied!
sample_mean_5 = pd.DataFrame({
'income': [data["x"].sample(5).mean() for _ in range(1000)],
'type': "Mean of 5"
})
sample_mean_20 = pd.DataFrame({
'income': [data["x"].sample(20).mean() for _ in range(1000)],
'type': "Mean of 20"
})
samples = pd.concat([sample_data, sample_mean_5, sample_mean_20])
samples
sample_mean_5 = pd.DataFrame({
'income': [data["x"].sample(5).mean() for _ in range(1000)],
'type': "Mean of 5"
})
sample_mean_20 = pd.DataFrame({
'income': [data["x"].sample(20).mean() for _ in range(1000)],
'type': "Mean of 20"
})
samples = pd.concat([sample_data, sample_mean_5, sample_mean_20])
samples
Out[4]:
| income | type | |
|---|---|---|
| 18869 | 70000.00 | Data |
| 34424 | 103000.00 | Data |
| 11601 | 51985.00 | Data |
| 39244 | 46000.00 | Data |
| 41527 | 36000.00 | Data |
| ... | ... | ... |
| 995 | 58185.00 | Mean of 20 |
| 996 | 58664.95 | Mean of 20 |
| 997 | 66996.60 | Mean of 20 |
| 998 | 65541.50 | Mean of 20 |
| 999 | 66092.50 | Mean of 20 |
3000 rows × 2 columns
In [5]:
Copied!
import matplotlib.pyplot as plt
g = sns.FacetGrid(samples, col="type")
g.map(plt.hist, 'income', bins=40)
import matplotlib.pyplot as plt
g = sns.FacetGrid(samples, col="type")
g.map(plt.hist, 'income', bins=40)
Out[5]:
<seaborn.axisgrid.FacetGrid at 0x7f05dc630590>