Source code for mds_2025_helper_functions.htv

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, t, chi2, f

[docs] def htv(test_output, test_type="z", alpha=0.05, tail="two-tailed"): """ Visualize Type I (α) and Type II (β) errors in hypothesis testing. Parameters: test_output (dict): Dictionary containing hypothesis test parameters: - 'mu0': Mean under the null hypothesis (H0) - 'mu1': Mean under the alternative hypothesis (H1) - 'sigma': Standard deviation (for z or t tests) - 'sample_size': Sample size (for z and t tests) - 'df1': Degrees of freedom 1 (for F tests, optional) - 'df2': Degrees of freedom 2 (for F tests, optional) - 'df': Degrees of freedom (for t and chi-squared tests, optional) test_type (str): Type of test ('z', 't', 'chi2', 'anova'). alpha (float): Significance level (Type I error rate). tail (str): One-tailed or two-tailed test ("one-tailed" or "two-tailed"). Returns: tuple: (fig, ax) Matplotlib figure and axes objects. Example: >>> import numpy as np >>> from mds_2025_helper_functions.htv import htv >>> >>> # Example: Visualizing a two-tailed z-test >>> test_params = { ... 'mu0': 100, # Null hypothesis mean ... 'mu1': 105, # Alternative mean ... 'sigma': 15, # Standard deviation ... 'sample_size': 30 # Sample size ... } >>> fig, ax = htv(test_params, test_type="z", alpha=0.05, tail="two-tailed") >>> plt.show() # This will plot the null and alternative hypothesis distributions with # shaded regions for Type I and Type II errors, and mark the critical values. >>> # Example: One-tailed t-test with degrees of freedom >>> test_params_t = { ... 'mu0': 0, ... 'mu1': 1.5, ... 'sigma': 1, ... 'sample_size': 25 ... } >>> fig, ax = htv(test_params_t, test_type="t", alpha=0.01, tail="one-tailed") >>> plt.show() # This will plot a one-tailed t-test diagram with appropriate critical regions. """ mu0 = test_output.get("mu0", 0) mu1 = test_output.get("mu1", 1) sigma = test_output.get("sigma", 1) sample_size = test_output.get("sample_size", 30) df = test_output.get("df", None) df1 = test_output.get("df1", None) df2 = test_output.get("df2", None) # Define critical values and distributions based on the test type if test_type == "z": std_error = sigma / np.sqrt(sample_size) if tail == "two-tailed": critical_value_low = norm.ppf(alpha / 2, loc=mu0, scale=std_error) critical_value_high = norm.ppf(1 - alpha / 2, loc=mu0, scale=std_error) else: critical_value = norm.ppf(1 - alpha, loc=mu0, scale=std_error) dist_null = lambda x: norm.pdf(x, loc=mu0, scale=std_error) dist_alt = lambda x: norm.pdf(x, loc=mu1, scale=std_error) elif test_type == "t": if df is None: df = sample_size - 1 # Default degrees of freedom if tail == "two-tailed": critical_value_low = t.ppf(alpha / 2, df=df) critical_value_high = t.ppf(1 - alpha / 2, df=df) else: critical_value = t.ppf(1 - alpha, df=df) dist_null = lambda x: t.pdf(x, df=df) dist_alt = lambda x: t.pdf(x, df=df, loc=mu1 - mu0) elif test_type == "chi2": if df is None: raise ValueError("Degrees of freedom (df) must be specified for chi-squared tests.") if tail == "two-tailed": critical_value_low = chi2.ppf(alpha / 2, df=df) critical_value_high = chi2.ppf(1 - alpha / 2, df=df) else: critical_value = chi2.ppf(1 - alpha, df=df) dist_null = lambda x: chi2.pdf(x, df=df) dist_alt = lambda x: chi2.pdf(x, df=df + 1) # Alternative hypothesis elif test_type == "anova": if df1 is None or df2 is None: raise ValueError("Degrees of freedom (df1 and df2) must be specified for ANOVA tests.") if tail == "two-tailed": critical_value_low = f.ppf(alpha / 2, dfn=df1, dfd=df2) critical_value_high = f.ppf(1 - alpha / 2, dfn=df1, dfd=df2) else: critical_value = f.ppf(1 - alpha, dfn=df1, dfd=df2) dist_null = lambda x: f.pdf(x, dfn=df1, dfd=df2) dist_alt = lambda x: f.pdf(x, dfn=df1, dfd=df2 + 1) # Alternative hypothesis else: raise ValueError("Invalid test type. Choose 'z', 't', 'chi2', or 'anova'.") # Generate x values x = np.linspace(mu0 - 4 * sigma, mu1 + 4 * sigma, 1000) # Null and alternative distributions y_null = dist_null(x) y_alt = dist_alt(x) # Plot distributions fig, ax = plt.subplots(figsize=(12, 6)) ax.plot(x, y_null, label="Null Distribution (H0)", color="blue") ax.plot(x, y_alt, label="Alternative Distribution (H1)", color="red") # Fill Type I and Type II error regions if tail == "two-tailed": ax.fill_between(x, 0, y_null, where=(x <= critical_value_low) | (x >= critical_value_high), color="orange", alpha=0.5, label="Type I Error (α)") ax.fill_between(x, 0, y_alt, where=(x > critical_value_low) & (x < critical_value_high), color="green", alpha=0.5, label="Type II Error (β)") ax.axvline(x=critical_value_low, color="black", linestyle="--", label=f"Critical Value (Low) = {critical_value_low:.2f}") ax.axvline(x=critical_value_high, color="black", linestyle="--", label=f"Critical Value (High) = {critical_value_high:.2f}") else: ax.fill_between(x, 0, y_null, where=(x >= critical_value), color="orange", alpha=0.5, label="Type I Error (α)") ax.fill_between(x, 0, y_alt, where=(x < critical_value), color="green", alpha=0.5, label="Type II Error (β)") ax.axvline(x=critical_value, color="black", linestyle="--", label=f"Critical Value = {critical_value:.2f}") # Add legend and grid ax.set_title(f"Type I and Type II Errors for {test_type.upper()} Test", fontsize=16) ax.set_xlabel("Test Statistic", fontsize=14) ax.set_ylabel("Probability Density", fontsize=14) ax.legend(fontsize=12) ax.grid(alpha=0.3) return fig, ax