Sampling Distribution of a Statistic¶
In [1]:
Copied!
import pandas as pd
import seaborn as sns
import pandas as pd
import seaborn as sns
In [2]:
Copied!
data = pd.read_csv("../data/loans_income.csv")
data
data = pd.read_csv("../data/loans_income.csv")
data
Out[2]:
| x | |
|---|---|
| 0 | 67000 |
| 1 | 52000 |
| 2 | 100000 |
| 3 | 78762 |
| 4 | 37041 |
| ... | ... |
| 49995 | 40000 |
| 49996 | 54000 |
| 49997 | 50000 |
| 49998 | 82000 |
| 49999 | 70000 |
50000 rows × 1 columns
In [3]:
Copied!
sample_data = pd.DataFrame({
'income': data["x"].sample(1000),
'type': "Data"
})
sample_data
sample_data = pd.DataFrame({
'income': data["x"].sample(1000),
'type': "Data"
})
sample_data
Out[3]:
| income | type | |
|---|---|---|
| 12865 | 50000 | Data |
| 39562 | 47500 | Data |
| 12410 | 35000 | Data |
| 37284 | 82000 | Data |
| 9290 | 53000 | Data |
| ... | ... | ... |
| 21338 | 110000 | Data |
| 23629 | 44000 | Data |
| 35915 | 45000 | Data |
| 9348 | 84000 | Data |
| 16936 | 59712 | Data |
1000 rows × 2 columns
In [4]:
Copied!
sample_mean_5 = pd.DataFrame({
'income': [data["x"].sample(5).mean() for _ in range(1000)],
'type': "Mean of 5"
})
sample_mean_20 = pd.DataFrame({
'income': [data["x"].sample(20).mean() for _ in range(1000)],
'type': "Mean of 20"
})
samples = pd.concat([sample_data, sample_mean_5, sample_mean_20])
samples
sample_mean_5 = pd.DataFrame({
'income': [data["x"].sample(5).mean() for _ in range(1000)],
'type': "Mean of 5"
})
sample_mean_20 = pd.DataFrame({
'income': [data["x"].sample(20).mean() for _ in range(1000)],
'type': "Mean of 20"
})
samples = pd.concat([sample_data, sample_mean_5, sample_mean_20])
samples
Out[4]:
| income | type | |
|---|---|---|
| 12865 | 50000.00 | Data |
| 39562 | 47500.00 | Data |
| 12410 | 35000.00 | Data |
| 37284 | 82000.00 | Data |
| 9290 | 53000.00 | Data |
| ... | ... | ... |
| 995 | 69400.95 | Mean of 20 |
| 996 | 67074.20 | Mean of 20 |
| 997 | 63600.00 | Mean of 20 |
| 998 | 67091.30 | Mean of 20 |
| 999 | 45081.80 | Mean of 20 |
3000 rows × 2 columns
In [5]:
Copied!
import matplotlib.pyplot as plt
g = sns.FacetGrid(samples, col="type")
g.map(plt.hist, 'income', bins=40)
import matplotlib.pyplot as plt
g = sns.FacetGrid(samples, col="type")
g.map(plt.hist, 'income', bins=40)
Out[5]:
<seaborn.axisgrid.FacetGrid at 0x7fa360cd3770>