# Boulder Workshop March 2016 - Conor Dolan & Abdel Abdellaoui # R script - EFA & CFA on Neurotocism & Extraversion ############################################################################ # Part 1: read the data # clear the memory rm(list=ls(all=TRUE)) # load OpenMx library(OpenMx) # set workingdirectory setwd("/home/abdel/Factor_Analysis_Big2/") # read the data datb5=read.table('rdataf') # read the female data # assign variable names varlabs=c('sex', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6', 'e1', 'e2', 'e3', 'e4', 'e5', 'e6', 'o1', 'o2', 'o3', 'o4', 'o5', 'o6', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6') colnames(datb5)=varlabs # select the variables of interest isel=c(2:13) # selection of variables n1-n6, e1-e6 datb2=datb5[,isel] # the data frame that we'll use below. # End of part 1. ############################################################################# # Part 2: summary statistics Ss1=cov(datb2[,1:12]) # calculate the covariance matrix in females print(round(Ss1,1)) Rs1=cov2cor(Ss1) # convert to correlation matrix print(round(Rs1,2)) Ms1=apply(datb2[,1:12],2,mean) # females means print(round(Ms1,2)) # End of part 2 ############################################################################# # Part 3: Exploratory Factor analysis # make a scree plot evals1=eigen(Rs1)$values # eigenvalues females print(round(evals1,2)) plot(1:12,evals1,type='b',ylab='eigenvalues',main=c('SCREEPLOT')) lines(1:12,rep(1,12),type='l') nfact=2 # number of factors... we expect 2 factors N & E # use factanal to do three exploratory factor analyses. fa1s1un=factanal(datb2[,1:12],factors=nfact,rotation='none') # unrotated efa fa1s1va=factanal(datb2[,1:12],factors=nfact,rotation='varimax') # efa varimax factors uncorrelated fa1s1pr=factanal(datb2[,1:12],factors=nfact,rotation='promax') # efa promax factor correlated # print output print(fa1s1un) print(fa1s1va) print(fa1s1pr) # plot unrotated factor loadings plot(fa1s1un$loadings) text(fa1s1un$loadings,row.names(fa1s1un$loadings), cex=0.75, pos=4) # plot rotated factor loadings plot(fa1s1pr$loadings) text(fa1s1pr$loadings,row.names(fa1s1pr$loadings), cex=0.75, pos=4) # End EFA part 3 ############################################################################# # Part 4: CFA in OpenMx, confirmatory factor model # Part 4A: saturated model ny=12 # number of indicators ne=2 # expected number of common factors varnames=colnames(datb2) # var names ### fit the saturated model ### # define the means and covariance matrix in OpenMx to obtain the saturated model loglikelihood Rs1=mxMatrix(type='Stand',nrow=ny,ncol=ny,free=TRUE,value=.05, lbound=-.9,ubound=.9,name='cor') # 12x12 correlation matrix Sds1=mxMatrix(type='Diag',nrow=ny,ncol=ny,free=TRUE,value=5,name='sds') # 12x12 diagonal matrix (st devs) Mean1=mxMatrix(type='Full',nrow=1,ncol=ny,free=TRUE,value=25,name='mean1') # 1x12 vector means MkS1=mxAlgebra(expression=sds%*%cor%*%sds,name='Ssat1') # expected covariance matrix satmodels1=mxModel('part1', Rs1, Sds1, Mean1, MkS1) # assemble the model # data + estimation function satdats1=mxModel("part2", mxData( observed=datb2, type="raw"), # specify the data mxExpectationNormal(covariance="part1.Ssat1", means="part1.mean1", dimnames=varnames), # specify the fit function mxFitFunctionML() ) # data & expected cov/means # fit the saturated model.... Models1 <- mxModel("models1", satmodels1, satdats1, mxAlgebra(part2.objective, name="minus2loglikelihood"), mxFitFunctionAlgebra("minus2loglikelihood")) Models1_out <- mxRun(Models1) # round(mxEval(part1.cor,Models1_out),2) # print correlation matrix round(mxEval(diag(part1.sds),Models1_out),2) # print stdevs round(mxEval(part1.mean1,Models1_out),2) # print means summary(Models1_out) # End part 4A ############################################################################# # Part4B: CFA # specify CFA model 1 CFAm1=mxModel('CFAmodel1', # factor loading matrix Ly=mxMatrix(type='Full',nrow=ny,ncol=ne, free=matrix(c( T,F, T,F, T,F, T,F, T,F, # T,T T,F, F,T, F,T, F,T, # T,T F,T, F,T, F,T),ny,ne,byrow=T), values=c(4,4,4,4,4,4,0,0,0,0,0,0, 0,0,0,0,0,0,4,4,4,4,4,4), # read colunm-wise labels=matrix(c( 'f1_1','f1_2', 'f2_1','f2_2', 'f3_1','f3_2', 'f4_1','f4_2', 'f5_1','f5_2', # T,T 'f6_1','f6_2', 'f7_1','f7_2', 'f8_1','f8_2', 'f9_1','f9_2', # F,T 'f10_1','f10_2', 'f11_1','f11_2', 'f12_1','f12_2'),ny,ne,byrow=T),name='Ly'), ## diagonal cov matrix of residuals Te=mxMatrix(type='Diag',nrow=ny,ncol=ny, labels=c('rn1','rn2','rn3','rn4','rn5','rn6', 're1','re2','re3','re4','re5','re6'), free=TRUE,value=10,name='Te'), ## latent correlation matrix Ps=mxMatrix(type='Symm',nrow=ne,ncol=ne, free=c(FALSE,TRUE,FALSE), labels=c('v1_0','r12_0', 'v2_0'),values=c(1,.0,1),name='Ps'), # means Tau=mxMatrix(type='Full',nrow=1,ncol=ny,free=TRUE,value=25, labels=c('mn1','mn2','mn3','mn4','mn5','mn6', 'me1','me2','me3','me4','me5','me6'), name='Means'), # define means and expected covariance matrix & MKS=mxAlgebra(expression=Ly%*%(Ps)%*%t(Ly)+Te%*%t(Te),name='Sigma'), MKM=mxAlgebra(expression=Means,name='means') ) # the data mxdat=mxData(observed=datb2, type="raw") # specify how model expectations are calculated and fitted estfun=mxModel("estfun", mxExpectationNormal( covariance="CFAmodel1.Sigma", means="CFAmodel1.means",dimnames=varnames), mxFitF=mxFitFunctionML()) # assemble the model, the data and define the estimation function cfamodel1=mxModel("CFM1",CFAm1, mxdat, estfun, mxAlgebra(estfun.objective, name="minus2loglikelihood"), mxFitFunctionAlgebra("minus2loglikelihood")) # fit the model CFM1_out = mxRun(cfamodel1) mxCompare(Models1_out,CFM1_out) # free extraversion loading on n5, and neuroticism loading on e3 cfamodel2=omxSetParameters(cfamodel1,labels=c("f5_2","f9_1"),free=c(T,T),values=c(0,0)) CFM2_out = mxRun(cfamodel2) mxCompare(CFM2_out,CFM1_out) est_Ly=mxEval(CFAmodel1.Ly,CFM2_out) # factor loadings est_Ps=mxEval(CFAmodel1.Ps,CFM2_out) # correlation matrix latent factor print(round(est_Ly,2)) print(round(est_Ps,3)) # check explained variance per item est_Ly=mxEval(CFAmodel1.Ly,CFM2_out) # factor loadings est_Ps=mxEval(CFAmodel1.Ps,CFM2_out) # correlation matrix latent factors est_Te=mxEval(CFAmodel1.Te,CFM2_out) # residuals est_Te=est_Te^2 Sfit1=est_Ly%*%est_Ps%*%t(est_Ly) Sfit=Sfit1+est_Te # expected covariance matrix rel=diag(Sfit1)/diag(Sfit) # variance due to factors / total variance = proportion variance explained by factors print(round(rel,3)) # print reliabilities print(round(Sfit1,2)) print(round(Sfit,2))