Skip to content

leAverage

leAverage.py

leAverage

Bases: TTestSample

Build leAverage object.

Parameters:

sample_A (pd.DataFrame): A sample data.
sample_B (pd.DataFrame): B sample data.
confidence_level (float): desired confidence level, default : 95%.

Example:

::

    >>> from leab import after
    >>> from leab import leDataset

    >>> data = leDataset.SampleLeAverage()
    >>> ab_test = after.leAverage(data.A, data.B)

    >>> ab_test.sample_A.confidence_interval

    [34.75214007684581, 81.59785992315418]

    >>> ab_test.get_verdict()

    'Sample A mean is greater'
Source code in leab/after/leAverage.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
class leAverage(TTestSample):
    """
    Build leAverage object.

    Parameters:

        sample_A (pd.DataFrame): A sample data.
        sample_B (pd.DataFrame): B sample data.
        confidence_level (float): desired confidence level, default : 95%.

    Example:

        ::

            >>> from leab import after
            >>> from leab import leDataset

            >>> data = leDataset.SampleLeAverage()
            >>> ab_test = after.leAverage(data.A, data.B)

            >>> ab_test.sample_A.confidence_interval

            [34.75214007684581, 81.59785992315418]

            >>> ab_test.get_verdict()

            'Sample A mean is greater'
        """
    def __init__(
        self,
        sample_A: pd.DataFrame,
        sample_B: pd.DataFrame,
        confidence_level: float = 0.95,
    ):
        self.confidence_level = confidence_level
        self.sample_A = TTestSample(sample_A, confidence_level)
        self.sample_B = TTestSample(sample_B, confidence_level)
        self.compute()

    def compute(self) -> None:
        self._get_diff_mean()
        self._get_diff_variance()
        self._get_diff_df()
        self._get_diff_mean_stddev()
        self._get_t()
        self._get_x()
        self._get_p_value()
        self._get_d()
        self._get_SE()

    def _get_diff_mean(self) -> None:
        self.diff_mean = self.sample_A.mean - self.sample_B.mean

    def _get_diff_variance(self) -> None:
        self.diff_variance = (
            self.sample_A.variance / self.sample_A.count_elt
            + self.sample_B.variance / self.sample_B.count_elt
        )

    def _get_diff_df(self) -> None:
        self.diff_df = (
            self.diff_variance
            * self.diff_variance
            / (
                (self.sample_A.variance / self.sample_A.count_elt)
                * (self.sample_A.variance / self.sample_A.count_elt)
                / (self.sample_A.count_elt - 1)
                + (self.sample_B.variance / self.sample_B.count_elt)
                * (self.sample_B.variance / self.sample_B.count_elt)
                / (self.sample_B.count_elt - 1)
            )
        )

    def _get_diff_mean_stddev(self) -> None:
        self.diff_mean_stddev = np.sqrt(self.diff_variance)

    def _get_t(self) -> None:
        self.t = self.diff_mean / self.diff_mean_stddev

    def _get_x(self) -> None:
        self.x = self.diff_df / (self.diff_df + self.t * self.t)

    def _get_p_value(self) -> None:
        self.p_value = sc.betainc(self.diff_df / 2, 0.5, self.x)

    def _get_d(self) -> None:
        self.d = self.diff_mean

    def _get_SE(self) -> None:
        self.SE = self.diff_mean_stddev

    def plot_difference_of_means(self):
        pass

    def get_verdict(self) -> None:
        if self.p_value < 1 - self.confidence_level:
            if self.sample_A.mean > self.sample_B.mean:
                return("Sample A mean is greater")
            else:
                return("Sample B mean is greater")
        else:
            return("No significant difference")