Skip to content

abtest.stats

ABTest - stats¤

ABTestRatios ¤

This test uses the difference between the ratios $d=A_cr - B_cr$ as the signature.

According to central limit theorem, we could approximate the distribution of d as a normal distribution.

Null hypothesis: d = 0, sigma = pooled standard error Alternative: d !=0, sigma = pooled std error

alt_distribution(self) ¤

Generate the distribution for the null hypothesis

Source code in dietbox/abtest/stats.py
def alt_distribution(self):
    """Generate the distribution for the null hypothesis"""

    self.conversion_uplift()
    self.pooled_std_err()

    return scs.norm(self.uplift, self.pld_std_err)

conversion_rate(self) ¤

Conversion rate for a specific group

Source code in dietbox/abtest/stats.py
def conversion_rate(self):
    """Conversion rate for a specific group"""

    self.A_cr = cal_conversion_rate(self.A_total, self.A_converted)
    self.B_cr = cal_conversion_rate(self.B_total, self.B_converted)

    return self.A_cr, self.B_cr

conversion_uplift(self) ¤

Uplift in conversion rate

Relative uplift is the relative conversion rate difference between the test groups

Source code in dietbox/abtest/stats.py
def conversion_uplift(self):
    """Uplift in conversion rate

    Relative uplift is the relative conversion rate difference between the test groups
    """

    A_total, B_total, A_converted, B_converted = (
        self.A_total,
        self.B_total,
        self.A_converted,
        self.B_converted,
    )

    self.uplift = cal_conversion_uplift(A_total, B_total, A_converted, B_converted)

    return self.uplift

null_distribution(self) ¤

Generate the distribution for the null hypothesis

Source code in dietbox/abtest/stats.py
def null_distribution(self):
    """Generate the distribution for the null hypothesis"""

    self.pooled_std_err()

    return scs.norm(0, self.pld_std_err)

pooled_probability(self) ¤

Pooled probability for two samples

This is better used as an intermediate value.

Source code in dietbox/abtest/stats.py
def pooled_probability(self):
    """Pooled probability for two samples

    This is better used as an intermediate value.
    """

    A_total, B_total, A_converted, B_converted = (
        self.A_total,
        self.B_total,
        self.A_converted,
        self.B_converted,
    )

    self.probability = cal_pooled_probability(
        A_total, B_total, A_converted, B_converted
    )

    return self.probability

pooled_std_err(self) ¤

Pooled standard error for two samples

For more information about the definition, refer to wikipedia: https://en.wikipedia.org/wiki/Pooled_variance

Source code in dietbox/abtest/stats.py
def pooled_std_err(self):
    """Pooled standard error for two samples

    For more information about the definition, refer to wikipedia:
    https://en.wikipedia.org/wiki/Pooled_variance
    """

    self.pooled_probability()

    # Pooled standard error
    pp = self.probability
    self.pld_std_err = cal_pooled_std_err(pp, self.A_total, self.B_total)

    return self.pld_std_err

report(self, with_data=None, pipeline=None) ¤

Run pipeline and generate report

Source code in dietbox/abtest/stats.py
def report(self, with_data=None, pipeline=None):
    """Run pipeline and generate report"""
    if pipeline is None:
        pipeline = "all"
    if with_data is None:
        with_data = True

    all_pipes = [
        meth
        for meth in dir(self)
        if callable(getattr(self, meth)) and "__" not in meth and meth != "report"
    ]

    for method in all_pipes:
        getattr(self, method)()

    res = {
        "kpi": {"a": self.A_cr, "b": self.B_cr},
        "std_err": {"a": self.A_std_err, "b": self.B_std_err},
        "pooled_std_err": self.pld_std_err,
        "probability": self.probability,
        "uplift": self.uplift,
        "p_value": self.p,
        "z_score": self.z,
        "diff_std_err": self.diff_std_err,
    }

    if self.name:
        res["name"] = self.name
    if with_data:
        res["data"] = self.data

    return res

standard_error(self) ¤

standard error

Source code in dietbox/abtest/stats.py
def standard_error(self):
    """standard error"""

    self.A_std_err = cal_standard_error(self.A_total, self.A_converted)
    self.B_std_err = cal_standard_error(self.B_total, self.B_converted)

    return self.A_std_err, self.B_std_err

ABTestRatiosNaive ¤

A naive AB Test ratios class

conversion_rate(self) ¤

Conversion rate for a specific group

Source code in dietbox/abtest/stats.py
def conversion_rate(self):
    """Conversion rate for a specific group"""

    self.A_cr = cal_conversion_rate(self.A_total, self.A_converted)
    self.B_cr = cal_conversion_rate(self.B_total, self.B_converted)

    return self.A_cr, self.B_cr

conversion_uplift(self) ¤

Uplift in conversion rate

Relative uplift is the relative conversion rate difference between the test groups

Source code in dietbox/abtest/stats.py
def conversion_uplift(self):
    """Uplift in conversion rate

    Relative uplift is the relative conversion rate difference between the test groups
    """

    A_total, B_total, A_converted, B_converted = (
        self.A_total,
        self.B_total,
        self.A_converted,
        self.B_converted,
    )

    self.uplift = cal_conversion_uplift(A_total, B_total, A_converted, B_converted)

    return self.uplift

p_value(self, test=None) ¤

calculate p-value

Source code in dietbox/abtest/stats.py
def p_value(self, test=None):
    """calculate p-value"""

    # self.conversion_rate()

    self.p = cal_p_value(self.data)

    return self.p

pooled_probability(self) ¤

Pooled probability for two samples

This is better used as an intermediate value.

Source code in dietbox/abtest/stats.py
def pooled_probability(self):
    """Pooled probability for two samples

    This is better used as an intermediate value.
    """

    A_total, B_total, A_converted, B_converted = (
        self.A_total,
        self.B_total,
        self.A_converted,
        self.B_converted,
    )

    self.probability = cal_pooled_probability(
        A_total, B_total, A_converted, B_converted
    )

    return self.probability

pooled_std_err(self) ¤

Pooled standard error for two samples

For more information about the definition, refer to wikipedia: https://en.wikipedia.org/wiki/Pooled_variance

Source code in dietbox/abtest/stats.py
def pooled_std_err(self):
    """Pooled standard error for two samples

    For more information about the definition, refer to wikipedia:
    https://en.wikipedia.org/wiki/Pooled_variance
    """

    self.pooled_probability()

    # Pooled standard error
    pp = self.probability
    self.pld_std_err = cal_pooled_std_err(pp, self.A_total, self.B_total)

    return self.pld_std_err

report(self, with_data=None, pipeline=None) ¤

Run pipeline and

Source code in dietbox/abtest/stats.py
def report(self, with_data=None, pipeline=None):
    """Run pipeline and"""
    if pipeline is None:
        pipeline = "all"
    if with_data is None:
        with_data = True

    all_pipes = [
        meth
        for meth in dir(self)
        if callable(getattr(self, meth)) and "__" not in meth and meth != "report"
    ]

    for method in all_pipes:
        getattr(self, method)()

    res = {
        "kpi": {"a": self.A_cr, "b": self.B_cr},
        "std_err": {"a": self.A_std_err, "b": self.B_std_err},
        "pooled_std_err": self.pld_std_err,
        "probability": self.probability,
        "uplift": self.uplift,
        "p_value": self.p,
        "z_score": self.z,
        "diff_std_err": self.diff_std_err,
    }

    if self.name:
        res["name"] = self.name
    if with_data:
        res["data"] = self.data

    return res

standard_error(self) ¤

standard error

Source code in dietbox/abtest/stats.py
def standard_error(self):
    """standard error"""

    self.A_std_err = cal_standard_error(self.A_total, self.A_converted)
    self.B_std_err = cal_standard_error(self.B_total, self.B_converted)

    return self.A_std_err, self.B_std_err

z_score(self, significance_level=None, two_tailed=None) ¤

Calculate z-score

Source code in dietbox/abtest/stats.py
def z_score(self, significance_level=None, two_tailed=None):
    """Calculate z-score"""

    if significance_level is None:
        significance_level = self.p

    if two_tailed is None:
        two_tailed = True

    self.z = cal_z_score(significance_level, two_tailed)

    return self.z

ABTestSeries ¤

AB test of series data

kpi(self) ¤

Conversion rate for a specific group

Source code in dietbox/abtest/stats.py
def kpi(self):
    """Conversion rate for a specific group"""

    if self.kpi_method == "count":
        self.A_kpi = cal_conversion_rate(self.A_total, self.A_converted)
        self.B_kpi = cal_conversion_rate(self.B_total, self.B_converted)
    else:
        self.A_kpi = self.A_converted
        self.B_kpi = self.B_converted

    return self.A_kpi, self.B_kpi

kpi_uplift(self) ¤

Uplift in conversion rate

Relative uplift is the relative conversion rate difference between the test groups

Source code in dietbox/abtest/stats.py
def kpi_uplift(self):
    """Uplift in conversion rate

    Relative uplift is the relative conversion rate difference between the test groups
    """

    self.uplift = (self.B_kpi - self.A_kpi) / self.A_kpi

    return self.uplift

p_value(self) ¤

calculate p-value

Source code in dietbox/abtest/stats.py
def p_value(self):
    """calculate p-value"""

    self.kpi()

    self.p = cal_p_value(self.data, test="mannwhitney")

    return self.p

report(self, with_data=None, pipeline=None) ¤

Run pipeline and

Source code in dietbox/abtest/stats.py
def report(self, with_data=None, pipeline=None):
    """Run pipeline and"""
    if pipeline is None:
        pipeline = "all"
    if with_data is None:
        with_data = True

    all_pipes = [
        meth
        for meth in dir(self)
        if callable(getattr(self, meth)) and "__" not in meth and meth != "report"
    ]

    for method in all_pipes:
        getattr(self, method)()

    res = {
        "kpi": {"a": self.A_kpi, "b": self.B_kpi},
        "std_err": {"a": self.A_std_err, "b": self.B_std_err},
        "uplift": self.uplift,
        "p_value": self.p,
    }

    if self.name:
        res["name"] = self.name
    if with_data:
        res["data"] = self.data

    return res

standard_error(self) ¤

standard error

Source code in dietbox/abtest/stats.py
def standard_error(self):
    """standard error"""

    self.A_std_err = cal_standard_error(self.A_total, self.A_converted)
    self.B_std_err = cal_standard_error(self.B_total, self.B_converted)

    return self.A_std_err, self.B_std_err