Tuesday, December 12, 2023

SSA attempt functioning: The main SSA codes copied from StackExchange

  

The synthetic data imitating the business cycle is my original although the functions of generating the complex time series refers to my supervisor's work.

The set of Python codes implementing Singular Spectrum Analysis is directly copied from "Singular spectrum analysis and their "eigentriplets" (Stack Exchange). This SSA method is almost identical to the one based on MATLAB "Singular Spectrum Analysis - Beginners guide" (MathWorks) which I attempted to duplicate with Python. 

This set of Python codes succeeds in the reconstruction component (RC) as shown in the graph.


# importing necessary tools
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import networkx as nx
import numpy as np
import pandas as pd
import random
import statistics
import scipy.linalg
from numpy import linalg
from scipy import stats
import statsmodels.formula.api as sm
import math
import cmath
from scipy.linalg import hankel

# Showing the plots bigger
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,10)


## Creating a synthetic time series imitating the business cycle

# Defining the function for the synthetic complex exponential  
def syn_exp(amp,freq,x):
    t=len(amp)
    y=[]
    for omg in range(len(x)):
        yi=0
        for ap in range(len(amp)):
            yi+=amp[ap]*cmath.exp(freq[ap]*x[omg])
        y.append(yi)
    return y

# Defining complex number
i=complex(0,1)
i


# Creating the synthetic data imitating the business cycle
phi = [0.03, 3*i, 10*i, -1+30*i, -1-20*i, 0.01+5*i];
alpha = [4, 3, 2, 2, 2, 1];

# sampling rate
S = 100

# number of exponentials in the signal
n = 600

# f_syn: samples used by Prony's methhod
_2n_1=(2*n-1)+1
omega = [2*math.pi/S*(_0__2n_1) for _0__2n_1 in range(_2n_1)]

# Defininig the synthetic time series
f_syn = syn_exp(alpha, phi, omega)

# Defining the noise
noise=[np.random.normal(0) for k in range(len(f_syn))]
noise=noise/linalg.norm(noise)

# eps: noise level, also try 10^(-0)
eps =  10^(-1);
#eps = 0

# f = f_syn added with random noise
f = f_syn + eps*noise;
f

# Re-defining the time series and its length
s=f;
N=len(f)


## The SSA starts
print("The following refers to https://stats.stackexchange.com/questions/544105/singular-spectrum-analysis-and-their-eigentriplets")
print("The SSA method of this set of Python codes is identical to the embedded covariance method of https://uk.mathworks.com/matlabcentral/fileexchange/58967-singular-spectrum-analysis-beginners-guide")

# Getting window length L and lagged length K
L = 500-1
K = N - L + 1

# Constructing the time lagged Hankel matrix
X=np.zeros((K,L))
for m in range (0, L):
    X[:,m] = s[m:K+m]
   
# Trajectory matrix
Cemb = np.dot(X.T,X)/K

# Eigen decomposition
eigenValues, eigenVectors = np.linalg.eig(Cemb)
idx = eigenValues.argsort()[::-1]  
eigenValues = eigenValues[idx]
eigenVectors = eigenVectors[:,idx]

# Vectors of Principal Components
PC = np.dot(X,eigenVectors)

# Pre-allocating Reconstructed Component Matrix
RC = np.zeros((N, L))
# Reconstruct the elementary matrices without storing them
for k in range(L):
    myBuf = np.outer(PC[:,k], eigenVectors[:,k].T)
    myBuf = myBuf[::-1]
    RC[:,k] = [myBuf.diagonal(j).mean()\
               for j in range(-myBuf.shape[0]+1, myBuf.shape[1])]
       
# # First 6 RC
# fig, ax = plt.subplots(3,2)
# ax = ax.flatten()
# for k in range (0, 6):
#     ax[k].plot(RC[:,k])
#     ax[k].set_title(str(i))
# plt.tight_layout()


# Plotting a graph comparison
RawSSA=RC[:,0]
for k in range(L-1):
    RawSSA=np.add(RawSSA,RC[:,k+1])
SCs=10 # Smoothed cycle components
SmoothedSSA=RC[:,0]
for k in range(SCs-1):
    SmoothedSSA=np.add(SmoothedSSA,RC[:,k+1])
plt.subplot(2,1,1)
plt.title("Original vs. Reconstructed time series")
plt.plot(s[:])
plt.plot(RawSSA)
plt.subplot(2,1,2)
plt.title("Reconstructed raw vs. smoothed time series")
plt.plot(RawSSA)
plt.plot(SmoothedSSA)