Bases: Chi2Sample
Build leSuccess object.
Parameters:
sample_A (pd.DataFrame): A sample data.
sample_B (pd.DataFrame): B sample data.
confidence_level (float): desired confidence level, default : 95%.
Example:
::
>>> from leab import leDataset
>>> from leab import after
>>> data = leDataset.SampleLeSuccess()
>>> ab_test = after.leSuccess(data.A,
... data.B,
... confidence_level=0.95)
>>> ab_test.sample_A.confidence_interval
[8.526343659939133, 22.13718821096384]
>>> ab_test.p_value
0.25870176105718934
>>> ab_test.get_verdict()
'No significant difference'
Source code in leab/after/leSuccess.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131 | class leSuccess(Chi2Sample):
"""
Build leSuccess object.
Parameters:
sample_A (pd.DataFrame): A sample data.
sample_B (pd.DataFrame): B sample data.
confidence_level (float): desired confidence level, default : 95%.
Example:
::
>>> from leab import leDataset
>>> from leab import after
>>> data = leDataset.SampleLeSuccess()
>>> ab_test = after.leSuccess(data.A,
... data.B,
... confidence_level=0.95)
>>> ab_test.sample_A.confidence_interval
[8.526343659939133, 22.13718821096384]
>>> ab_test.p_value
0.25870176105718934
>>> ab_test.get_verdict()
'No significant difference'
"""
def __init__(
self,
sample_A: pd.DataFrame,
sample_B: pd.DataFrame,
confidence_level: float = 0.95,
):
self.confidence_level = confidence_level
self.sample_A = Chi2Sample(sample_A, self.confidence_level)
self.sample_B = Chi2Sample(sample_B, self.confidence_level)
self.compute()
def compute(self) -> None:
self._get_contingency_table()
self._get_observed_values()
self._get_expected_values()
self._get_chi_square_statistic()
self._get_degree_of_freedom()
self._get_p_value()
def _get_contingency_table(self) -> None:
sample_A_value_counts = self.sample_A.sample["success"].value_counts()
sample_B_value_counts = self.sample_B.sample["success"].value_counts()
self.contingency_table = pd.DataFrame(
[sample_A_value_counts, sample_B_value_counts]
)
self.contingency_table.index = ["sample_A", "sample_B"]
self.contingency_table.columns = ["fail", "success"]
def _get_observed_values(self) -> None:
self.observed_values = self.contingency_table.values
def _get_expected_values(self) -> None:
b = scipy.stats.chi2_contingency(self.contingency_table)
self.expected_values = b[3]
def _get_chi_square_statistic(self) -> None:
chi_square = sum(
[
(o - e) ** 2.0 / e
for o, e in zip(self.observed_values, self.expected_values)
]
)
self.chi_square_statistic = chi_square[0] + chi_square[1]
def _get_degree_of_freedom(self) -> None:
no_of_rows = len(self.contingency_table.iloc[0:2, 0])
no_of_columns = len(self.contingency_table.iloc[0, 0:2])
self.degree_of_freedom = (no_of_rows - 1) * (no_of_columns - 1)
def _get_p_value(self) -> None:
self.p_value = 1 - chi2.cdf(
x=self.chi_square_statistic, df=self.degree_of_freedom
)
def get_verdict(self) -> None:
if self.p_value < 1.0 - self.confidence_level:
if (
self.sample_A.success / self.sample_A.trial
> self.sample_B.success / self.sample_B.trial
):
return("Sample A is more successful")
else:
return("Sample B is more successful")
else:
return("No significant difference")
|