A/B Testing¶

By Theory¶

Outcome	Price A	Price B
Conversion	200	182
No conversion	23.539	22.406

In [1]:

Copied!

import pandas as pd
import pandas as pd

In [2]:

Copied!





data = {
    "Price A": [200, 23_539],
    "Price B": [182, 22_406]
}

df = pd.DataFrame(data, index=["Conversion", "No conversion"])
print(f"Price A, no conversion: {df.loc["No conversion", "Price A"]}")
data = {
    "Price A": [200, 23_539],
    "Price B": [182, 22_406]
}

df = pd.DataFrame(data, index=["Conversion", "No conversion"])
print(f"Price A, no conversion: {df.loc["No conversion", "Price A"]}")

Price A, no conversion: 23539

In [3]:

Copied!

a_sum = int(df.loc["Conversion"]["Price A"] + df.loc["No conversion"]["Price A"])
p_a = df.loc["Conversion"]["Price A"] / a_sum
p_a
a_sum = int(df.loc["Conversion"]["Price A"] + df.loc["No conversion"]["Price A"])
p_a = df.loc["Conversion"]["Price A"] / a_sum
p_a

Out[3]:

np.float64(0.008424954715868403)

In [4]:

Copied!

b_sum = int(df.loc["Conversion"]["Price B"] + df.loc["No conversion"]["Price B"])
p_b = df.loc["Conversion"]["Price B"] / b_sum
p_b
b_sum = int(df.loc["Conversion"]["Price B"] + df.loc["No conversion"]["Price B"])
p_b = df.loc["Conversion"]["Price B"] / b_sum
p_b

Out[4]:

np.float64(0.008057375597662475)

In [5]:

Copied!

p_hat = (df.loc["Conversion"]["Price B"] + df.loc["Conversion"]["Price A"]) / (a_sum + b_sum)
p_hat
p_hat = (df.loc["Conversion"]["Price B"] + df.loc["Conversion"]["Price A"]) / (a_sum + b_sum)
p_hat

Out[5]:

np.float64(0.008245731430915016)

In [6]:

Copied!





import math
import scipy.stats as st
z = (p_a - p_b) / math.sqrt(p_hat*(1-p_hat)*((1/a_sum) + (1/b_sum)))
one_sided_prob = 1 - st.norm.cdf(z)
one_sided_prob
import math
import scipy.stats as st
z = (p_a - p_b) / math.sqrt(p_hat*(1-p_hat)*((1/a_sum) + (1/b_sum)))
one_sided_prob = 1 - st.norm.cdf(z)
one_sided_prob

Out[6]:

np.float64(0.33094407441560325)

We can't reject the null hypothesis since our $\text{p-value (0.33)} \gt 0.05$.

By Resampling¶

In [7]:

Copied!





total_conversion = df.loc["Conversion"]["Price A"] + df.loc["Conversion"]["Price B"]
total_no_conversion = df.loc["No conversion"]["Price A"] + df.loc["No conversion"]["Price B"]
overall = list(([1] * total_conversion) + ([0] * total_no_conversion))

count = 500

import random

a_list_sim = []
b_list_sim = []
bigger_count = 0
perm_diffs = []

for _ in range(count):
    random.shuffle(overall)
    a = len([item for item in overall[:a_sum] if item == 1]) / a_sum
    b = len([item for item in overall[a_sum:(a_sum+b_sum)] if item == 1]) / b_sum
    perm_diffs.append(a - b) 

perm_diff_greater = 0
for item in perm_diffs:
    if item >= p_a - p_b:
        perm_diff_greater += 1
print(f"{perm_diff_greater} / {count} = {perm_diff_greater/count}")

perm_diffs.sort()
needed_portion_start_index = len(perm_diffs) - len(perm_diffs) * 0.05
total_conversion = df.loc["Conversion"]["Price A"] + df.loc["Conversion"]["Price B"]
total_no_conversion = df.loc["No conversion"]["Price A"] + df.loc["No conversion"]["Price B"]
overall = list(([1] * total_conversion) + ([0] * total_no_conversion))

count = 500

import random

a_list_sim = []
b_list_sim = []
bigger_count = 0
perm_diffs = []

for _ in range(count):
    random.shuffle(overall)
    a = len([item for item in overall[:a_sum] if item == 1]) / a_sum
    b = len([item for item in overall[a_sum:(a_sum+b_sum)] if item == 1]) / b_sum
    perm_diffs.append(a - b) 

perm_diff_greater = 0
for item in perm_diffs:
    if item >= p_a - p_b:
        perm_diff_greater += 1
print(f"{perm_diff_greater} / {count} = {perm_diff_greater/count}")

perm_diffs.sort()
needed_portion_start_index = len(perm_diffs) - len(perm_diffs) * 0.05

174 / 500 = 0.348

In [8]:

Copied!





import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(5,5))
plt.hist(perm_diffs,bins=11, alpha=0.5, edgecolor="black", color="red")
plt.axvline(x=p_a - p_b, color="black", linestyle="--")
plt.axvline(x=perm_diffs[int(needed_portion_start_index - 1)], color="blue", linestyle="--")
plt.show()
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(5,5))
plt.hist(perm_diffs,bins=11, alpha=0.5, edgecolor="black", color="red")
plt.axvline(x=p_a - p_b, color="black", linestyle="--")
plt.axvline(x=perm_diffs[int(needed_portion_start_index - 1)], color="blue", linestyle="--")
plt.show()

No description has been provided for this image

Right part of the black line represents the p-value. And the blue line represents the critical value (for a 95% confidence level).

Again, we can also accept the null hypothesis using the permutation test because $\text{p-value (0.3...)} \gt 0.05$

With Tools¶

In [9]:

Copied!





from scipy.stats import chi2_contingency
import numpy as np

survivors = np.array([
    [df.loc["Conversion"]["Price A"], df.loc["No conversion"]["Price A"]],
    [df.loc["Conversion"]["Price B"], df.loc["No conversion"]["Price B"]]
])

chi2, p_value, df, _ = chi2_contingency(survivors)
print(f"p-value for single sided test: {p_value / 2}")
from scipy.stats import chi2_contingency
import numpy as np

survivors = np.array([
    [df.loc["Conversion"]["Price A"], df.loc["No conversion"]["Price A"]],
    [df.loc["Conversion"]["Price B"], df.loc["No conversion"]["Price B"]]
])

chi2, p_value, df, _ = chi2_contingency(survivors)
print(f"p-value for single sided test: {p_value / 2}")

p-value for single sided test: 0.34977973135733076