#  practical phenotypic factor analysis 
#  --------------------------------------
#
#  rm(list=ls(all=TRUE))			# clear the memory
library(umx)

# rename the function 
EFA=factanal  			
#
#   the data are in a dataframe called GFF - built in to umx  
#   data acknowledgement: NTR data. Thanks Meike Bartels, Dorret Boomsma
#
data(GFF) # the data

# get the data of the female twins
#
GFFfem=GFF[(GFF$zyg_6grp == "MZFF" | GFF$zyg_6grp == "DZFF"),]
baseNames <- c("gff", "hap", "sat", "AD", "SOMA", "SOC")
#
#
# gff_T1 General family functioning  
# hap hap_T1 General happiness 
# sat_T1 Satisfaction with life 
# AD_T1 Anxiety and Depression  
# SOMA_T1 Somatic complaints  
# SOC_T1 Social problems 
#
# assign the _T1 and _T2 addons#
var_names1=paste(baseNames,"_T1",sep="")
var_names2=paste(baseNames,"_T2",sep="")
#
fadat1=GFFfem[,var_names1]        # get the data we want to factor analyze twin 1
fadat2=GFFfem[,var_names2]        # get the data we want to factor analyze twin 2
#
fadat1=na.omit(fadat1)   # get rid of missing 
fadat2=na.omit(fadat2)   # get rid of missing 
#   
nv=dim(fadat1)[2] 	# number of variables = 6
#
# starting point --------------------------------------------- 
#
fadat=fadat1
#
colnames(fadat)=baseNames
# r
# always look at the data
#
N1=dim(fadat)[1]  	# get the sample size
#
S1=cov(fadat)  		# get the covariance matrix 
R1=cov2cor(S1) 		# get the correlation matrix
#
# figures

hist(fadat[,1],10,main=colnames(fadat)[1])
hist(fadat[,2],10,main=colnames(fadat)[2])
hist(fadat[,3],10,main=colnames(fadat)[3])
hist(fadat[,4],10,main=colnames(fadat)[4])
hist(fadat[,5],10,main=colnames(fadat)[5])
hist(fadat[,6],10,main=colnames(fadat)[6])
#
print(round(R1,3))
#

# reset the plot layout:
layo=matrix(1,1,1)
layout(layo)

evs=eigen(R1)$values  	# get the eigenvalues of the correlation matrix
# plot eigenvalues
#
cl=rep(2,nv); cl[evs<1]=3  # cosmetics
plot(1:nv,evs,type='b',col=cl,lwd=3)
lines(1:nv,rep(1,nv),type='l')
#
# "gff", "hap", "sat", "AD", "SOMA", "SOC"
no_m1_fa = EFA(fadat,factors=1)
no_m2_fa = EFA(~gff+hap+sat+AD+SOMA+SOC ,factors=2,rotation='none',data=fadat)
pro_m2_fa = EFA(~gff+hap+sat+AD+SOMA+SOC,factors=2,rotation='promax',data=fadat)
vari_m2_fa = EFA(~gff+hap+sat+AD+SOMA+SOC,factors=2,rotation='varimax',data=fadat)

# Lets look at the single factor model:
print(no_m1_fa, digits = 3, cutoff = .0, sort = FALSE) 

# poor fit! LEts lok at the 2 factor model:
print(no_m2_fa, digits = 3, cutoff = .0, sort = FALSE) 


##
print(vari_m2_fa, digits = 3, cutoff = .0, sort = FALSE) 
print(pro_m2_fa, digits = 3, cutoff = .0, sort = FALSE) 
#
#
#how to do it in umx
#pro_m2_umx = umxEFA(baseNames, factors = 2, data = fadat, rotation = "promax")
#vari_m2_umx = umxEFA(baseNames, factors = 2, data = fadat, rotation = "varimax")