The Bootstrap¶
In [1]:
Copied!
from sklearn.utils import resample
import pandas as pd
from sklearn.utils import resample
import pandas as pd
In [2]:
Copied!
loans_income = pd.read_csv("../data/loans_income.csv")
loans_income
loans_income = pd.read_csv("../data/loans_income.csv")
loans_income
Out[2]:
| x | |
|---|---|
| 0 | 67000 |
| 1 | 52000 |
| 2 | 100000 |
| 3 | 78762 |
| 4 | 37041 |
| ... | ... |
| 49995 | 40000 |
| 49996 | 54000 |
| 49997 | 50000 |
| 49998 | 82000 |
| 49999 | 70000 |
50000 rows × 1 columns
In [3]:
Copied!
results = []
for _ in range(1000):
sample = resample(loans_income)
results.append(sample["x"].mean())
res_df = pd.Series(results)
res_df
results = []
for _ in range(1000):
sample = resample(loans_income)
results.append(sample["x"].mean())
res_df = pd.Series(results)
res_df
Out[3]:
0 68766.81156
1 68770.10790
2 68848.16744
3 68946.84508
4 68710.84424
...
995 68722.73528
996 68428.32996
997 68539.88930
998 68657.13478
999 68880.52984
Length: 1000, dtype: float64
In [4]:
Copied!
print(f"Original median: {loans_income["x"].mean()}")
print(f"Bootstrap method median {res_df.mean()}")
print(f"Bias: {res_df.mean()- loans_income["x"].mean()}")
print(f"Std. Error: {res_df.std()}")
print(f"Original median: {loans_income["x"].mean()}")
print(f"Bootstrap method median {res_df.mean()}")
print(f"Bias: {res_df.mean()- loans_income["x"].mean()}")
print(f"Std. Error: {res_df.std()}")
Original median: 68760.51844 Bootstrap method median 68761.05207012 Bias: 0.5336301200004527 Std. Error: 142.41785215344774