import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.stats as stats
from statsmodels.stats import stattools

class DiscreteRandomVariable:
    def __init__(self, a=0, b=1):
        self.variableType = ""
        self.low = a
        self.high = b
        return
    def draw(self, numberOfSamples):
        samples = np.random.randint(self.low, self.high, numberOfSamples)
        return samples

DieRolls = DiscreteRandomVariable(1, 6)
plt.hist(DieRolls.draw(10), bins=[1,2,3,4,5,6,7], align='mid')
plt.xlabel('Value')
plt.ylabel('Occurences')
plt.legend(['Die Rolls']);

plt.hist(DieRolls.draw(10000), bins=[1,2,3,4,5,6,7], align='mid')
plt.xlabel('Value')
plt.ylabel('Occurences')
plt.legend(['Die Rolls']);

class BinomialRandomVariable(DiscreteRandomVariable):
    def __init__(self, numberOfTrials=10, probabilityOfSuccess=0.5):
        self.variableType = "Binomial"
        self.numberOfTrials = numberOfTrials
        self.probabilityOfSuccess = probabilityOfSuccess
        return
    def draw(self, numberOfSamples):
        samples = np.random.binomial(self.numberOfTrials, self.probabilityOfSuccess, numberOfSamples)
        return samples

StockProbabilities = BinomialRandomVariable(5, 0.50)
plt.hist(StockProbabilities.draw(50), bins=[0, 1, 2, 3, 4, 5, 6], align='left')
plt.xlabel('Value')
plt.ylabel('Occurences')
plt.legend(['Die Rolls']);

plt.hist(StockProbabilities.draw(10000), bins=[0, 1, 2, 3, 4, 5, 6], align='left')
plt.xlabel('Value')
plt.ylabel('Occurences');

StockProbabilities = BinomialRandomVariable(5, 0.25)
plt.hist(StockProbabilities.draw(10000), bins=[0, 1, 2, 3, 4, 5, 6], align='left')
plt.xlabel('Value')
plt.ylabel('Occurences');

class ContinuousRandomVariable:
    def __init__(self, a=0, b=1):
        self.variableType = ""
        self.low = a
        self.high = b
        return
    def draw(self, numberOfSamples):
        samples = np.random.uniform(self.low, self.high, numberOfSamples)
        return samples

a = 0.0
b = 8.0
x = np.linspace(a, b, 100)
y = [1/(b-a) for i in x]
plt.plot(x, y)
plt.xlabel('Value')
plt.ylabel('Probability');

y = [(i - a)/(b - a) for i in x]
plt.plot(x, y)
plt.xlabel('Value')
plt.ylabel('Probability');

class NormalRandomVariable(ContinuousRandomVariable):
    def __init__(self, mean=0, variance=1):
        ContinuousRandomVariable.__init__(self)
        self.variableType = "Normal"
        self.mean = mean
        self.standardDeviation = np.sqrt(variance)
        return
    def draw(self, numberOfSamples):
        samples = np.random.normal(self.mean, self.standardDeviation, numberOfSamples)
        return samples

mu_1 = 0
mu_2 = 0
sigma_1 = 1
sigma_2 = 2
x = np.linspace(-8, 8, 200)
y = (1/(sigma_1 * np.sqrt(2 * 3.14159))) * np.exp(-(x - mu_1)*(x - mu_1) / (2 * sigma_1 * sigma_1))
z = (1/(sigma_2 * np.sqrt(2 * 3.14159))) * np.exp(-(x - mu_2)*(x - mu_2) / (2 * sigma_2 * sigma_2))
plt.plot(x, y, x, z)
plt.xlabel('Value')
plt.ylabel('Probability');

n = 50
p = 0.25
X = BinomialRandomVariable(n, p)
X_samples = X.draw(10000)
Z_samples = (X_samples - n * p) / np.sqrt(n * p * (1 - p))

plt.hist(X_samples, bins=range(0, n + 2), align='left')
plt.xlabel('Value')
plt.ylabel('Probability');

plt.hist(Z_samples, bins=20)
plt.xlabel('Value')
plt.ylabel('Probability');

Y_initial = 100
X = NormalRandomVariable(0, 1)
Y_returns = X.draw(100) # generate 100 daily returns
Y = pd.Series(np.cumsum(Y_returns), name = 'Y') + Y_initial
Y.plot()
plt.xlabel('Time')
plt.ylabel('Value');

Z_initial = 50
Z_returns = X.draw(100)
Z = pd.Series(np.cumsum(Z_returns), name='Z') + Z_initial
Z.plot()
plt.xlabel('Time')
plt.ylabel('Value');

Y_quantity = 20
Z_quantity = 50
Y_weight = Y_quantity/(Y_quantity + Z_quantity)
Z_weight = 1 - Y_weight

W_initial = Y_weight * Y_initial + Z_weight * Z_initial
W_returns = Y_weight * Y_returns + Z_weight * Z_returns
W = pd.Series(np.cumsum(W_returns), name='Portfolio') + W_initial
W.plot()
plt.xlabel('Time')
plt.ylabel('Value');

pd.concat([Y, Z, W], axis=1).plot()
plt.xlabel('Time')
plt.ylabel('Value');

plt.hist(W_returns);
plt.xlabel('Return')
plt.ylabel('Occurrences');

# Cài thư viện lấy dữ liệu miễn phí
!curl -fsSLO https://raw.githubusercontent.com/algo-stocks/data/master/data.py

from data import get_prices

def sharpe_ratio(asset, riskfree=0):
    return np.mean(asset - riskfree)/np.std(asset - riskfree)

start = '2022-01-01'
end = '2025-01-01'

prices = get_prices('BID', start_date=start, end_date=end).BID

# Take the daily returns
returns = prices.pct_change()[1:]

#Set a cutoff
cutoff = 0.01

# Get the p-value of the JB test
_, p_value, skewness, kurtosis = stattools.jarque_bera(returns)
print("The JB test p-value is:", p_value)
print("We reject the hypothesis that the data are normally distributed:", p_value < cutoff)
print("The skewness of the returns is:", skewness)
print("The kurtosis of the returns is:", kurtosis)
plt.hist(returns, bins=20)
plt.xlabel('Value')
plt.ylabel('Occurrences');

The JB test p-value is: 2.5322403145608086e-37
We reject the hypothesis that the data are normally distributed: True
The skewness of the returns is: 0.01929429687924076
The kurtosis of the returns is: 5.326634610141621

# Take the sample mean and standard deviation of the returns
sample_mean = np.mean(returns)
sample_std_dev = np.std(returns)

x = np.linspace(-(sample_mean + 4 * sample_std_dev), (sample_mean + 4 * sample_std_dev), len(returns))
sample_distribution = ((1/np.sqrt(sample_std_dev * sample_std_dev * 2 * np.pi)) *
                       np.exp(-(x - sample_mean)*(x - sample_mean) / (2 * sample_std_dev * sample_std_dev)))
plt.hist(returns, bins=20, density=True);
plt.plot(x, sample_distribution)
plt.xlabel('Value')
plt.ylabel('Occurrences');

¶

Biến ngẫu nhiên rời rạc và liên tục¶

Biến Ngẫu Nhiên Rời Rạc¶

Phân phối Đồng đều¶

Phân phối Nhị thức¶

Biến ngẫu nhiên liên tục¶

Phân phối đồng nhất¶

Phân phối chuẩn¶

Tìm phân phối phù hợp¶