The Bootstrap¶
In [1]:
Copied!
from sklearn.utils import resample
import pandas as pd
from sklearn.utils import resample
import pandas as pd
In [2]:
Copied!
loans_income = pd.read_csv("../data/loans_income.csv")
loans_income
loans_income = pd.read_csv("../data/loans_income.csv")
loans_income
Out[2]:
| x | |
|---|---|
| 0 | 67000 |
| 1 | 52000 |
| 2 | 100000 |
| 3 | 78762 |
| 4 | 37041 |
| ... | ... |
| 49995 | 40000 |
| 49996 | 54000 |
| 49997 | 50000 |
| 49998 | 82000 |
| 49999 | 70000 |
50000 rows × 1 columns
In [3]:
Copied!
results_bootstrap = []
results= []
sample_from_income = loans_income.sample(150)
for _ in range(1000):
sample_bootstrap = resample(sample_from_income)
sample = loans_income.sample(150)
results.append(sample["x"].mean())
results_bootstrap.append(sample_bootstrap["x"].mean())
res_df = pd.Series(results_bootstrap)
res_df_real = pd.Series(results)
results_bootstrap = []
results= []
sample_from_income = loans_income.sample(150)
for _ in range(1000):
sample_bootstrap = resample(sample_from_income)
sample = loans_income.sample(150)
results.append(sample["x"].mean())
results_bootstrap.append(sample_bootstrap["x"].mean())
res_df = pd.Series(results_bootstrap)
res_df_real = pd.Series(results)
In [4]:
Copied!
print(f"Original mean: {loans_income["x"].mean()}")
print(f"Real sampling mean: {res_df_real.mean()}")
print(f"Bootstrap method mean {res_df.mean()}")
print(f"Bias: {res_df.mean()- loans_income["x"].mean()}")
print(f"Bootstrapped Std. Error: {res_df.std()}")
print(f"Real Std. Error: {res_df_real.std()}")
print(f"Original mean: {loans_income["x"].mean()}")
print(f"Real sampling mean: {res_df_real.mean()}")
print(f"Bootstrap method mean {res_df.mean()}")
print(f"Bias: {res_df.mean()- loans_income["x"].mean()}")
print(f"Bootstrapped Std. Error: {res_df.std()}")
print(f"Real Std. Error: {res_df_real.std()}")
Original mean: 68760.51844 Real sampling mean: 68829.19609333333 Bootstrap method mean 66695.42792666666 Bias: -2065.0905133333436 Bootstrapped Std. Error: 2412.326225776251 Real Std. Error: 2751.5871297950516
In [ ]:
Copied!