This test is based on the standard probability density function (PDF) following a Gaussian distribution.
The PDF is configured with a mean of 0 and a standard deviation of 1 to generate the random values.
A series of these randomly generated values is then tested using a T-test to determine whether there is sufficient evidence to reject the null hypothesis (mean = 0).
In most cases, with a 95% confidence level (the standard probability level used in economics and finance), there is typically insufficient evidence to reject the null hypothesis, i.e., the mean is 0.
However, with a significantly high number of trials (e.g., 1000), under the same confidence level, there are instances where there is sufficient evidence to reject the null hypothesis, i.e., the mean is not 0.
# For statistics
import random
import numpy as np
from scipy import stats
from scipy.stats import norm
# For plotting
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
# For table output
import pandas as pd
# Setting Plot Size
plt.rcParams["figure.figsize"] = (10,7)
# Reference list
LinkList=["List of colours: https://matplotlib.org/stable/gallery/color/named_colors.html",
"Random Normal Distribution (Gausian PDF): https://numpy.org/doc/stable/reference/random/generated/numpy.random.normal.html",
"Normal Distribution Functions: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.t.html",
"T-test: ",
" - One sample: https://builtin.com/data-science/t-test-python",
" - Independent: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html",
" - Confidence Interval:",
" https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data",
" https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.t.html",
"Plot Filling https://matplotlib.org/stable/gallery/lines_bars_and_markers/fill_between_demo.html"]
for k in range(len(LinkList)):
print(LinkList[k])
# Setting parameters
n=1000; # Number of elements
mu=0 # mean of the distribution
sigma=1 # Standard deviation
# Random normally distributed elements with n data points
x=np.random.normal(mu, sigma, n) # A variable of elements
# Gausian Probability Density Function (PDF)
def Gausian_PDF(bins,mu,sigma):
g=1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2)) # PDF formula
return g
# Plotting function
def Disn_Hist(x,Confidence_Level):
# Histogram and Probability-Density setting
No_Bins=100 #Setting number of bins i.e. how many slices of bars
Probability_Density, bins, ignored = plt.hist(x, No_Bins, density=True, color = "lightgreen", label='Probability Density')
# Mean and Standard Deviation of the sample data x
x_bar=np.mean(x); s=np.std(x)
# Normal distribution of the sample
g_x=Gausian_PDF(bins,x_bar,s) # Calling PDF
plt.plot(bins,g_x,linewidth=2, color='Green', label='Sample Distribution') # Plotting line graph of PDF
plt.axvline(x=x_bar, label=f'x_bar is {round(x_bar,3)}', color='Green') # Sample mean
# Normal distribution of the null hypothesis
mu=0; sigma=1
g_0=Gausian_PDF(bins,mu,sigma) # Calling PDF
plt.plot(bins,g_0,linewidth=2, color='Grey', label='Null Hypothesis', linestyle='dashed') # Plotting line graph of PDF
plt.axvline(x=x_bar, label=f'mu is {round(mu,3)}', color='Grey') # Sample mean
# Confidence Interval: qnorm
# Graoh Output
plt.legend(loc="upper left")
plt.title("Histogram & Mean Comparison")
plt.show()
Confidence_Level=95
Disn_Hist(x,Confidence_Level)
# T-test testing the null-hypothesis
def Ttest(x, mu, Confidence_Level, Print):
x_bar=np.mean(x)
t,p=stats.ttest_1samp(x, mu, alternative='two-sided')
z=stats.t.ppf((1+Confidence_Level/100)/2.,n-1) # Z-value
CI=[x_bar-stats.sem(x)*z, x_bar+stats.sem(x)*z] # Confidence Interval
#Plotting a Gausian PDF
points=np.linspace(-3,3,100)
SEM=1 #stats.sem(x)
g_0=Gausian_PDF(points,mu,SEM)
plt.plot(points,g_0,linewidth=2, color='Grey', label='Sample Distribution') # Plotting line graph of PDF
plt.axvline(x=0, color='Grey') # Null-Hypothesis
plt.title('t-test')
plt.xlabel('t-value')
# T-test critical point
SigLevel=1-Confidence_Level/100
# Lower Fill
LowerT_Pnts=np.linspace(min(points),stats.norm.ppf(SigLevel/2))
LowerT_Dsts=Gausian_PDF(LowerT_Pnts,0,1)
#LowerT=[(bins[k],LowerT_Dsts[k]) for k in range(len(bins))]
plt.fill_between(LowerT_Pnts,LowerT_Dsts, color='darkseagreen')
# Upper Fill
UpperT_Pnts=np.linspace(stats.norm.ppf(1-SigLevel/2),max(points))
UpperT_Dsts=Gausian_PDF(UpperT_Pnts,0,1)
#UpperT=[(bins[k],UpperT_Dsts[k]) for k in range(len(bins))]
plt.fill_between(UpperT_Pnts,UpperT_Dsts, color='darkseagreen')
plt.axvline(x=t, label=f't is {round(t,3)}', color='Magenta') # Null-Hypothesis
h=['H0: mu = x_bar ∴ No sufficient evidence to reject the null hypothesis (H0).',
'H1: mu ≠ x_bar ∴ Sufficient evidence to reject the null hypothesis (H0).']
if Print==1:
print(f'Confidence Interval: {CI}')
print(f'T-value: {t}, p-value: {p}')
print(h[int(np.floor((1-p)-0.05))])
return t,p
print('\n')
print('T-test testing the null-hypothesis')
Confidence_Level=95 #%
ShowingTstatDetail=1
x=np.random.normal(mu, sigma, n) # A variable of elements
plt.figure(); Ttest(x, 0, Confidence_Level, ShowingTstatDetail)
# Why there is sometimes no significant evidence proving it is zero.
def Type_I_Error_test(Confidence_Level,Number_of_datapoints, Number_of_trial,Print):
X_bar=[];T=[];P=[];SigLevel=1-Confidence_Level/100
for l in range(Number_of_trial):
mu=0; sigma=1; n=Number_of_datapoints
x=np.random.normal(mu, sigma, n)
t,p=stats.ttest_1samp(x, mu)
if p<SigLevel:
#plt.figure(); Disn_Hist(x,Confidence_Level)
plt.figure(); Ttest(x, 0, Confidence_Level, Print)
x_bar=np.mean(x)
X_bar.append(x_bar)
T.append(t)
P.append(p)
return X_bar,T,P
print('\n')
print('Why there is sometimes no significant evidence proving it is zero.')
Confidence_Level=95 #%
Number_of_datapoints=1000
Number_of_trial=100
ShowingTstatDetail=0
X_bar,T,P=Type_I_Error_test(Confidence_Level, Number_of_datapoints, Number_of_trial, ShowingTstatDetail)
print(f"There are {len(P)} times out of {Number_of_trial} times of the trial not be able to reject the null hypothesis.")
print(f"The percentage of no sufficient evidence to reject the null hypothesis is {round(len(P)/Number_of_trial,2)}%")
d = {'x_bar':X_bar,'t':T, 'p':P}
df = pd.DataFrame(data=d)
display(df)