Skip to content

DIETBox

abtest.stats

emptymalei/diet

abtest.stats

ABTest - stats¤

`ABTestRatios` ¤

This test uses the difference between the ratios $d=A_cr - B_cr$ as the signature.

According to central limit theorem, we could approximate the distribution of d as a normal distribution.

Null hypothesis: d = 0, sigma = pooled standard error Alternative: d !=0, sigma = pooled std error

`alt_distribution(self)` ¤

Generate the distribution for the null hypothesis

Source code in dietbox/abtest/stats.py

def alt_distribution(self):
    """Generate the distribution for the null hypothesis"""

    self.conversion_uplift()
    self.pooled_std_err()

    return scs.norm(self.uplift, self.pld_std_err)

`conversion_rate(self)` ¤

Conversion rate for a specific group

Source code in dietbox/abtest/stats.py

def conversion_rate(self):
    """Conversion rate for a specific group"""

    self.A_cr = cal_conversion_rate(self.A_total, self.A_converted)
    self.B_cr = cal_conversion_rate(self.B_total, self.B_converted)

    return self.A_cr, self.B_cr

`conversion_uplift(self)` ¤

Uplift in conversion rate

Relative uplift is the relative conversion rate difference between the test groups

Source code in dietbox/abtest/stats.py

def conversion_uplift(self):
    """Uplift in conversion rate

    Relative uplift is the relative conversion rate difference between the test groups
    """

    A_total, B_total, A_converted, B_converted = (
        self.A_total,
        self.B_total,
        self.A_converted,
        self.B_converted,
    )

    self.uplift = cal_conversion_uplift(A_total, B_total, A_converted, B_converted)

    return self.uplift

`null_distribution(self)` ¤

Generate the distribution for the null hypothesis

Source code in dietbox/abtest/stats.py

def null_distribution(self):
    """Generate the distribution for the null hypothesis"""

    self.pooled_std_err()

    return scs.norm(0, self.pld_std_err)

`pooled_probability(self)` ¤

Pooled probability for two samples

This is better used as an intermediate value.

Source code in dietbox/abtest/stats.py

def pooled_probability(self):
    """Pooled probability for two samples

    This is better used as an intermediate value.
    """

    A_total, B_total, A_converted, B_converted = (
        self.A_total,
        self.B_total,
        self.A_converted,
        self.B_converted,
    )

    self.probability = cal_pooled_probability(
        A_total, B_total, A_converted, B_converted
    )

    return self.probability

`pooled_std_err(self)` ¤

Pooled standard error for two samples

For more information about the definition, refer to wikipedia: https://en.wikipedia.org/wiki/Pooled_variance

Source code in dietbox/abtest/stats.py

def pooled_std_err(self):
    """Pooled standard error for two samples

    For more information about the definition, refer to wikipedia:
    https://en.wikipedia.org/wiki/Pooled_variance
    """

    self.pooled_probability()

    # Pooled standard error
    pp = self.probability
    self.pld_std_err = cal_pooled_std_err(pp, self.A_total, self.B_total)

    return self.pld_std_err

`report(self, with_data=None, pipeline=None)` ¤

Run pipeline and generate report

Source code in dietbox/abtest/stats.py

def report(self, with_data=None, pipeline=None):
    """Run pipeline and generate report"""
    if pipeline is None:
        pipeline = "all"
    if with_data is None:
        with_data = True

    all_pipes = [
        meth
        for meth in dir(self)
        if callable(getattr(self, meth)) and "__" not in meth and meth != "report"
    ]

    for method in all_pipes:
        getattr(self, method)()

    res = {
        "kpi": {"a": self.A_cr, "b": self.B_cr},
        "std_err": {"a": self.A_std_err, "b": self.B_std_err},
        "pooled_std_err": self.pld_std_err,
        "probability": self.probability,
        "uplift": self.uplift,
        "p_value": self.p,
        "z_score": self.z,
        "diff_std_err": self.diff_std_err,
    }

    if self.name:
        res["name"] = self.name
    if with_data:
        res["data"] = self.data

    return res

`standard_error(self)` ¤

standard error

Source code in dietbox/abtest/stats.py

def standard_error(self):
    """standard error"""

    self.A_std_err = cal_standard_error(self.A_total, self.A_converted)
    self.B_std_err = cal_standard_error(self.B_total, self.B_converted)

    return self.A_std_err, self.B_std_err

`ABTestRatiosNaive` ¤

A naive AB Test ratios class

`conversion_rate(self)` ¤

Conversion rate for a specific group

Source code in dietbox/abtest/stats.py

def conversion_rate(self):
    """Conversion rate for a specific group"""

    self.A_cr = cal_conversion_rate(self.A_total, self.A_converted)
    self.B_cr = cal_conversion_rate(self.B_total, self.B_converted)

    return self.A_cr, self.B_cr

`conversion_uplift(self)` ¤

Uplift in conversion rate

Relative uplift is the relative conversion rate difference between the test groups

Source code in dietbox/abtest/stats.py

def conversion_uplift(self):
    """Uplift in conversion rate

    Relative uplift is the relative conversion rate difference between the test groups
    """

    A_total, B_total, A_converted, B_converted = (
        self.A_total,
        self.B_total,
        self.A_converted,
        self.B_converted,
    )

    self.uplift = cal_conversion_uplift(A_total, B_total, A_converted, B_converted)

    return self.uplift

`p_value(self, test=None)` ¤

calculate p-value

Source code in dietbox/abtest/stats.py

def p_value(self, test=None):
    """calculate p-value"""

    # self.conversion_rate()

    self.p = cal_p_value(self.data)

    return self.p

`pooled_probability(self)` ¤

Pooled probability for two samples

This is better used as an intermediate value.

Source code in dietbox/abtest/stats.py

def pooled_probability(self):
    """Pooled probability for two samples

    This is better used as an intermediate value.
    """

    A_total, B_total, A_converted, B_converted = (
        self.A_total,
        self.B_total,
        self.A_converted,
        self.B_converted,
    )

    self.probability = cal_pooled_probability(
        A_total, B_total, A_converted, B_converted
    )

    return self.probability

`pooled_std_err(self)` ¤

Pooled standard error for two samples

For more information about the definition, refer to wikipedia: https://en.wikipedia.org/wiki/Pooled_variance

Source code in dietbox/abtest/stats.py

def pooled_std_err(self):
    """Pooled standard error for two samples

    For more information about the definition, refer to wikipedia:
    https://en.wikipedia.org/wiki/Pooled_variance
    """

    self.pooled_probability()

    # Pooled standard error
    pp = self.probability
    self.pld_std_err = cal_pooled_std_err(pp, self.A_total, self.B_total)

    return self.pld_std_err

`report(self, with_data=None, pipeline=None)` ¤

Run pipeline and

Source code in dietbox/abtest/stats.py

def report(self, with_data=None, pipeline=None):
    """Run pipeline and"""
    if pipeline is None:
        pipeline = "all"
    if with_data is None:
        with_data = True

    all_pipes = [
        meth
        for meth in dir(self)
        if callable(getattr(self, meth)) and "__" not in meth and meth != "report"
    ]

    for method in all_pipes:
        getattr(self, method)()

    res = {
        "kpi": {"a": self.A_cr, "b": self.B_cr},
        "std_err": {"a": self.A_std_err, "b": self.B_std_err},
        "pooled_std_err": self.pld_std_err,
        "probability": self.probability,
        "uplift": self.uplift,
        "p_value": self.p,
        "z_score": self.z,
        "diff_std_err": self.diff_std_err,
    }

    if self.name:
        res["name"] = self.name
    if with_data:
        res["data"] = self.data

    return res

`standard_error(self)` ¤

standard error

Source code in dietbox/abtest/stats.py

def standard_error(self):
    """standard error"""

    self.A_std_err = cal_standard_error(self.A_total, self.A_converted)
    self.B_std_err = cal_standard_error(self.B_total, self.B_converted)

    return self.A_std_err, self.B_std_err

`z_score(self, significance_level=None, two_tailed=None)` ¤

Calculate z-score

Source code in dietbox/abtest/stats.py

def z_score(self, significance_level=None, two_tailed=None):
    """Calculate z-score"""

    if significance_level is None:
        significance_level = self.p

    if two_tailed is None:
        two_tailed = True

    self.z = cal_z_score(significance_level, two_tailed)

    return self.z

`ABTestSeries` ¤

AB test of series data

`kpi(self)` ¤

Conversion rate for a specific group

Source code in dietbox/abtest/stats.py

def kpi(self):
    """Conversion rate for a specific group"""

    if self.kpi_method == "count":
        self.A_kpi = cal_conversion_rate(self.A_total, self.A_converted)
        self.B_kpi = cal_conversion_rate(self.B_total, self.B_converted)
    else:
        self.A_kpi = self.A_converted
        self.B_kpi = self.B_converted

    return self.A_kpi, self.B_kpi

`kpi_uplift(self)` ¤

Uplift in conversion rate

Relative uplift is the relative conversion rate difference between the test groups

Source code in dietbox/abtest/stats.py

def kpi_uplift(self):
    """Uplift in conversion rate

    Relative uplift is the relative conversion rate difference between the test groups
    """

    self.uplift = (self.B_kpi - self.A_kpi) / self.A_kpi

    return self.uplift

`p_value(self)` ¤

calculate p-value

Source code in dietbox/abtest/stats.py

def p_value(self):
    """calculate p-value"""

    self.kpi()

    self.p = cal_p_value(self.data, test="mannwhitney")

    return self.p

`report(self, with_data=None, pipeline=None)` ¤

Run pipeline and

Source code in dietbox/abtest/stats.py

def report(self, with_data=None, pipeline=None):
    """Run pipeline and"""
    if pipeline is None:
        pipeline = "all"
    if with_data is None:
        with_data = True

    all_pipes = [
        meth
        for meth in dir(self)
        if callable(getattr(self, meth)) and "__" not in meth and meth != "report"
    ]

    for method in all_pipes:
        getattr(self, method)()

    res = {
        "kpi": {"a": self.A_kpi, "b": self.B_kpi},
        "std_err": {"a": self.A_std_err, "b": self.B_std_err},
        "uplift": self.uplift,
        "p_value": self.p,
    }

    if self.name:
        res["name"] = self.name
    if with_data:
        res["data"] = self.data

    return res

`standard_error(self)` ¤

standard error

Source code in dietbox/abtest/stats.py

def standard_error(self):
    """standard error"""

    self.A_std_err = cal_standard_error(self.A_total, self.A_converted)
    self.B_std_err = cal_standard_error(self.B_total, self.B_converted)

    return self.A_std_err, self.B_std_err