# ----------------------------------------------------------------------- # Program: CreateSimData.R # Author: Matt Keller # Date: March 10, 2016 # ----------------------------------------------------------------------- setwd("/Users/matthewkeller/Documents/Teaching/TwinWorkshops/Hackathon2016") # ----------------------------------------------------------------------- # BEGIN HACKATHON! # ----------------------------------------------------------------------- # Starting things up # ----------------------------------------------------------------------- # Where is the working directory? getwd() #If it's not where you want it to be, change it (Under File) # or just use setwd("/home/xxx/xxx") function where the '/home/xxx/xxx' is the full path # of where your script is # Get the two packages we'll use #source('http://openmx.psyc.virginia.edu/getOpenMx.R') #no need to run this line unless # you don't have OpenMx library installed yet require(OpenMx) #and make sure we're using the NPSOL optimizer mxOption(NULL,"Default optimizer","NPSOL") #Get our dataset from the web. #This is a cool R trick: your file might exist on your local machine #OR it might exist on the web! con <- "http://www.matthewckeller.com/public/all.dat2.txt" all.data <- read.table(con,header=TRUE) nms <- names(all.data) <- c("twintype","famid","tw1chol","tw2chol","tw1age","tw2age","tw1cholR","tw2cholR") # ----------------------------------------------------------------------- # Prepare Data # ----------------------------------------------------------------------- head(all.data) str(all.data) #get summary of each column in all.data summary(all.data) #make sure no weird outliers (e.g., -999) #define variables nv <- 1 #number of variables; allows us to make these models multivariate with ease mzData <- all.data[all.data$twintype %in% c('mzm','mzf'),3:6] dzData <- all.data[all.data$twintype %in% c('dzm','dzf','dzos'),3:6] dim(mzData) dim(dzData) # ----------------------------------------------------------------------- # Print Descriptive Statistics & look at data # ----------------------------------------------------------------------- #Always look at your data ! plot(mzData$tw1chol,mzData$tw2chol,main='MZ scatterplot') plot(dzData$tw1chol,dzData$tw2chol,main='DZ scatterplot') #Look at covariances round(cov(mzData[,1:2],use="complete"),3) #MZ Var/Covar 2x2 matrix round(cov(dzData[,1:2],use="complete"),3) #DZ Var/Covar 2x2 matrix #create object of MZ vs DZ cov's (cov.MZ <- cov(mzData,use="complete") [1,2]) (cov.DZ <- cov(dzData,use="complete") [1,2]) # -----------------------------------------------------------------------