# Boulder Workshop March 2016 - Conor Dolan & Abdel Abdellaoui
# R script - EFA & CFA on Neurotocism & Extraversion
############################################################################

	# Part 1: read the data 

# clear the memory
rm(list=ls(all=TRUE))

# load OpenMx
library(OpenMx)

# set workingdirectory
setwd("/home/abdel/Factor_Analysis_Big2/")

# read the data
datb5=read.table('rdataf')  	# read the female data

# assign variable names
varlabs=c('sex', 
 'n1', 'n2', 'n3', 'n4', 'n5', 'n6',
 'e1', 'e2', 'e3', 'e4', 'e5', 'e6',
 'o1', 'o2', 'o3', 'o4', 'o5', 'o6',
 'a1', 'a2', 'a3', 'a4', 'a5', 'a6',
 'c1', 'c2', 'c3', 'c4', 'c5', 'c6')
colnames(datb5)=varlabs

# select the variables of interest
isel=c(2:13)   # selection of variables n1-n6, e1-e6
datb2=datb5[,isel] # the data frame that we'll use below. 

# End of part 1.

#############################################################################

	# Part 2: summary statistics 

Ss1=cov(datb2[,1:12])	          # calculate the covariance matrix in females
print(round(Ss1,1))

Rs1=cov2cor(Ss1)                # convert to correlation matrix
print(round(Rs1,2))

Ms1=apply(datb2[,1:12],2,mean)	# females means
print(round(Ms1,2))

# End of part 2

#############################################################################

	# Part 3: Exploratory Factor analysis

# make a scree plot 
evals1=eigen(Rs1)$values	# eigenvalues females
print(round(evals1,2))
plot(1:12,evals1,type='b',ylab='eigenvalues',main=c('SCREEPLOT'))
lines(1:12,rep(1,12),type='l')

nfact=2 # number of factors... we expect 2 factors N & E

# use factanal to do three exploratory factor analyses.
fa1s1un=factanal(datb2[,1:12],factors=nfact,rotation='none') # unrotated efa
fa1s1va=factanal(datb2[,1:12],factors=nfact,rotation='varimax') # efa varimax factors uncorrelated
fa1s1pr=factanal(datb2[,1:12],factors=nfact,rotation='promax') # efa promax factor correlated

# print output
print(fa1s1un)
print(fa1s1va)
print(fa1s1pr)

# plot unrotated factor loadings
plot(fa1s1un$loadings)
text(fa1s1un$loadings,row.names(fa1s1un$loadings), cex=0.75, pos=4)
# plot rotated factor loadings
plot(fa1s1pr$loadings)
text(fa1s1pr$loadings,row.names(fa1s1pr$loadings), cex=0.75, pos=4)

# End EFA part 3  

#############################################################################

	# Part 4: CFA in OpenMx, confirmatory factor model

# Part 4A: saturated model 
ny=12	# number of indicators
ne=2	# expected number of common factors
varnames=colnames(datb2) # var names

### fit the saturated model ###

# define the means and covariance matrix in OpenMx to obtain the saturated model loglikelihood

Rs1=mxMatrix(type='Stand',nrow=ny,ncol=ny,free=TRUE,value=.05,
                          lbound=-.9,ubound=.9,name='cor')   			# 12x12 correlation matrix
Sds1=mxMatrix(type='Diag',nrow=ny,ncol=ny,free=TRUE,value=5,name='sds')  	# 12x12 diagonal matrix (st devs)
Mean1=mxMatrix(type='Full',nrow=1,ncol=ny,free=TRUE,value=25,name='mean1') 	# 1x12 vector means
MkS1=mxAlgebra(expression=sds%*%cor%*%sds,name='Ssat1')  			# expected covariance matrix

satmodels1=mxModel('part1', Rs1, Sds1, Mean1, MkS1) # assemble the model

# data + estimation function 
satdats1=mxModel("part2",
   mxData( observed=datb2, type="raw"),   # specify the data
   mxExpectationNormal(covariance="part1.Ssat1", means="part1.mean1",
   dimnames=varnames),                    # specify the fit function
   mxFitFunctionML()
                                    )     # data & expected cov/means
# fit the saturated model....
Models1 <-  mxModel("models1", satmodels1, satdats1,
                 mxAlgebra(part2.objective, name="minus2loglikelihood"),
                 mxFitFunctionAlgebra("minus2loglikelihood"))                                               
Models1_out <- mxRun(Models1)
#
round(mxEval(part1.cor,Models1_out),2)	# print correlation matrix
round(mxEval(diag(part1.sds),Models1_out),2)  # print stdevs
round(mxEval(part1.mean1,Models1_out),2)  # print means

summary(Models1_out)

# End part 4A

#############################################################################

	# Part4B: CFA 

# specify CFA model 1  
CFAm1=mxModel('CFAmodel1',

# factor loading matrix
Ly=mxMatrix(type='Full',nrow=ny,ncol=ne,
 free=matrix(c(
     T,F,
     T,F,
     T,F,
     T,F,
     T,F,  	# T,T
     T,F,
     F,T,
     F,T,
     F,T,   # T,T
     F,T,
     F,T,
     F,T),ny,ne,byrow=T),
 values=c(4,4,4,4,4,4,0,0,0,0,0,0,
             0,0,0,0,0,0,4,4,4,4,4,4),   # read colunm-wise
 labels=matrix(c(
    'f1_1','f1_2',
    'f2_1','f2_2',
    'f3_1','f3_2',
    'f4_1','f4_2',
    'f5_1','f5_2',  	#  T,T
    'f6_1','f6_2',
    'f7_1','f7_2',
    'f8_1','f8_2',
    'f9_1','f9_2',	# F,T
    'f10_1','f10_2',
    'f11_1','f11_2',
    'f12_1','f12_2'),ny,ne,byrow=T),name='Ly'),
## diagonal cov matrix of residuals
Te=mxMatrix(type='Diag',nrow=ny,ncol=ny,
              labels=c('rn1','rn2','rn3','rn4','rn5','rn6',
                       're1','re2','re3','re4','re5','re6'),
              free=TRUE,value=10,name='Te'), 
## latent correlation matrix 
Ps=mxMatrix(type='Symm',nrow=ne,ncol=ne,
                 free=c(FALSE,TRUE,FALSE),
                 labels=c('v1_0','r12_0', 'v2_0'),values=c(1,.0,1),name='Ps'),

# means
Tau=mxMatrix(type='Full',nrow=1,ncol=ny,free=TRUE,value=25,
                        labels=c('mn1','mn2','mn3','mn4','mn5','mn6',
                                 'me1','me2','me3','me4','me5','me6'),
                        name='Means'),

# define means and expected covariance matrix & 
MKS=mxAlgebra(expression=Ly%*%(Ps)%*%t(Ly)+Te%*%t(Te),name='Sigma'),
MKM=mxAlgebra(expression=Means,name='means')
                                           )

# the data
mxdat=mxData(observed=datb2, type="raw")

# specify how model expectations are calculated and fitted
estfun=mxModel("estfun",
   mxExpectationNormal( covariance="CFAmodel1.Sigma", means="CFAmodel1.means",dimnames=varnames),
   mxFitF=mxFitFunctionML())  

#  assemble the model, the data and define the estimation function 
cfamodel1=mxModel("CFM1",CFAm1, mxdat, estfun, 
                 mxAlgebra(estfun.objective, name="minus2loglikelihood"),
                 mxFitFunctionAlgebra("minus2loglikelihood"))
# fit the model 
CFM1_out = mxRun(cfamodel1)
mxCompare(Models1_out,CFM1_out)

# free extraversion loading on n5, and neuroticism loading on e3
cfamodel2=omxSetParameters(cfamodel1,labels=c("f5_2","f9_1"),free=c(T,T),values=c(0,0))
CFM2_out = mxRun(cfamodel2)
mxCompare(CFM2_out,CFM1_out)
est_Ly=mxEval(CFAmodel1.Ly,CFM2_out) # factor loadings
est_Ps=mxEval(CFAmodel1.Ps,CFM2_out) # correlation matrix latent factor
print(round(est_Ly,2))
print(round(est_Ps,3))

# check explained variance per item
est_Ly=mxEval(CFAmodel1.Ly,CFM2_out) # factor loadings
est_Ps=mxEval(CFAmodel1.Ps,CFM2_out) # correlation matrix latent factors
est_Te=mxEval(CFAmodel1.Te,CFM2_out) # residuals
est_Te=est_Te^2
Sfit1=est_Ly%*%est_Ps%*%t(est_Ly)    
Sfit=Sfit1+est_Te                    # expected covariance matrix
rel=diag(Sfit1)/diag(Sfit) # variance due to factors / total variance = proportion variance explained by factors
print(round(rel,3)) # print reliabilities

print(round(Sfit1,2))
print(round(Sfit,2))