# coding: utf-8 ## import the packages import numpy as np import matplotlib.pyplot as plt from scipy.stats import special_ortho_group from sklearn.preprocessing import scale ## a function to decompose a square matrix into the three peices ## a1 is the trace term, a2 is the traceless symmetric term, a3 is the anti-symmetric term def decompose(a): dim1 , dim = a.shape if dim1 != dim: return None a_asym = ( a - a.transpose() )/2. a_sym = ( a + a.transpose() )/2. a1 = np.identity(dim)*a.trace()/float(dim) a2 = a_sym - a1 a3 = a_asym return a1, a2, a3 ## a function that takes a and b matrices a return the trace of the dot product of a and transpose of b def trace_dot_trans(a,b): return np.trace( np.dot( a, b.transpose() ) ) ## a function that calculates the RV of X and Y matrices def RV(X,Y): A = X.dot(X.transpose()) B = Y.dot(Y.transpose()) RV = np.trace(A.dot(B))/np.sqrt( np.trace(A.dot(A.transpose()))*np.trace(B.dot(B.transpose())) ) return RV ## RV Modified of X and Y matrices def RV_I(X,Y): A = X.dot(X.transpose()) B = Y.dot(Y.transpose()) A_tild = A - np.diag(np.diag(A)) B_tild = B - np.diag(np.diag(B)) V1 = A_tild.flatten() V1p = A_tild.transpose().flatten() V2 = B_tild.flatten() V2p = B_tild.transpose().flatten() RV_I = np.dot(V1p,V2)/np.sqrt( np.dot(V1,V1)*np.dot(V2,V2) ) return RV_I ## this function calculates and returns the three families of RV_1, RV_2, and RV_3 that are defined in the paper def RV_II(X,Y): A = np.dot(X.transpose(),X) B = np.dot(Y.transpose(),Y) C = np.dot(X.transpose(),Y) ## a = decompose(A) b = decompose(B) c = decompose(C) ## denom1 = np.sqrt( trace_dot_trans(a[0],a[0])*trace_dot_trans(b[0],b[0]) ) denom2 = np.sqrt( trace_dot_trans(a[1],a[1])*trace_dot_trans(b[1],b[1]) ) ## RV1 rv1 = trace_dot_trans(c[0],c[0])/denom1 rv1 = np.sqrt(rv1) ## RV2 rv2 = (trace_dot_trans(c[1],c[1])/denom2 - 0.5)*2. ## RV3 rv3 = 1. - 2.*trace_dot_trans(c[2],c[2])/denom2 ## return np.array([rv1, rv2, rv3]) ################################################################# ## Simulations ################################################################# ## number of features #J = 130 J = 100 ## number of samples I_low = 2 I_up = 52 ## some containers for later I_vec = [] means = [[],[],[],[],[]] stds = [[],[],[],[],[]] ## repeat it for different sample numbers for I in range(I_low,I_up,2): RV_vec = [] RV_I_vec = [] RV_II0_vec = [] RV_II1_vec = [] RV_II2_vec = [] ## repeat the simulation for 100 times for _ in range(100): ## generate X and Y matrices X = np.random.normal(size=(I,J)) Y = np.random.normal(size=(I,J)) ##----------------------------------------------------------## ## uncommenting this leads to introduction of some correlation ## number of rows to be copied #n_copy_rows = 2 #n_copy_rows = int(I/2) #for row in range(n_copy_rows): # Y[row,:] = X[row,:] ##----------------------------------------------------------## ## calculate the correlation coefficients rv = RV(X,Y) rv_r = RV_I(X,Y) rv_arr = RV_II(X,Y) ## store the correlations. Later a mean and std of them is calculated RV_vec.append(rv) RV_I_vec.append(rv_r) RV_II0_vec.append(rv_arr[0]) RV_II1_vec.append(rv_arr[1]) RV_II2_vec.append(rv_arr[2]) I_vec.append(I) means[0].append(np.mean(RV_vec)) means[1].append(np.mean(RV_I_vec)) means[2].append(np.mean(RV_II0_vec)) means[3].append(np.mean(RV_II1_vec)) means[4].append(np.mean(RV_II2_vec)) stds[0].append(np.std(RV_vec)) stds[1].append(np.std(RV_I_vec)) stds[2].append(np.std(RV_II0_vec)) stds[3].append(np.std(RV_II1_vec)) stds[4].append(np.std(RV_II2_vec)) ################################################################# ## End of Simulations ################################################################# ## plot the results font = { 'family':'serif', 'size':14, 'style':'normal', 'weight':'medium', 'fontname':'Arial' } fig, ax = plt.subplots(1) ax.errorbar(x=I_vec,y=means[0],yerr=np.array(stds[0])/2.,fmt='.k',capsize=3,label='RV',linewidth=0.3) ax.errorbar(x=I_vec,y=means[1],yerr=np.array(stds[1])/2.,fmt='+b',capsize=3,label=r'RV$_{Mod}$',linewidth=0.3) ax.errorbar(x=I_vec,y=means[2],yerr=np.array(stds[2])/2.,fmt='xy',capsize=3,label=r'RV$_1$',linewidth=0.3) ax.errorbar(x=I_vec,y=means[3],yerr=np.array(stds[3])/2.,fmt='*r',capsize=3,label=r'RV$_2$',linewidth=0.3) ax.errorbar(x=I_vec,y=means[4],yerr=np.array(stds[4])/2.,fmt='^g',capsize=3,label=r'RV$_3$',linewidth=0.3) ax.set_xlabel('sample size',fontdict=font) ax.legend(loc='upper right') ax.xaxis.set_major_locator(plt.MaxNLocator(15)) plt.tight_layout() plt.show()