# Learn more about umx here:  http://tbates.github.io
# And read the preprint: https://www.dropbox.com/s/sntl3lmifey4tox/umx%20preprint.pdf?dl=0

# =========
# = Setup =
# =========
# If you have a workshop laptop, just load umx
library(umx)

# = If you are using your own laptop, install devtools and grab a new copy of umx =
install.packages("devtools")
devtools::install_github("tbates/umx")
library(umx)
packageVersion("umx") # (should be 1.1.5)

# =======================
# = BMI analysis in umx =
# =======================
data(twinData);
tmpTwin <- twinData
# Set zygosity to a factor (not needed for OpenMx 2.5.2 and better)
labList = c("MZFF", "MZMM", "DZFF", "DZMM", "DZOS")
tmpTwin$zyg = factor(tmpTwin$zyg, levels = 1:5, labels = labList)

# Pick the variables
selDVs = c("bmi1", "bmi2")
# nb: if you set suffix, selDVs can be just base names and the varnames
# for each twin are made automagically for example. "Var_T1" "Var_T2"
mzData <- tmpTwin[tmpTwin$zyg %in% "MZFF", selDVs]
dzData <- tmpTwin[tmpTwin$zyg %in% "DZFF", selDVs]
m1 = umxACE(selDVs = selDVs, dzData = dzData, mzData = mzData)
umxSummary(m1, showStd = TRUE)
plot(m1)

# ADE model (DZ correlation set to .25)
m2 = umxACE("ADE", selDVs = selDVs, dzData = dzData, mzData = mzData, dzCr = .25)
umxCompare(m2, m1) # ADE is better
umxSummary(m2, compare = m1) # nb: though this is ADE, columns are labeled ACE


# ===================
# = Ordinal example =
# ===================

# Cut bmi colum to form ordinal obesity variables
selDVs = c("obese")
ordDVs = c("obese1", "obese2")
obesityLevels = c('normal', 'overweight', 'obese')
cutPoints <- quantile(tmpTwin[, "bmi1"], probs = c(.5, .2), na.rm = TRUE)
tmpTwin$obese1 <- cut(tmpTwin$bmi1, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels) 
tmpTwin$obese2 <- cut(tmpTwin$bmi2, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels) 
# Make the ordinal variables into mxFactors (ensure ordered is TRUE, and require levels)
tmpTwin[, ordDVs] <- mxFactor(tmpTwin[, ordDVs], levels = obesityLevels)

mzData <- tmpTwin[tmpTwin$zyg %in% "MZFF", umx_paste_names(selDVs, "", 1:2)]
dzData <- tmpTwin[tmpTwin$zyg %in% "DZFF", umx_paste_names(selDVs, "", 1:2)]
str(mzData)
# 'data.frame':	569 obs. of  2 variables:
#  $ obese1: Ord.factor w/ 3 levels "normal"<"overweight"<..: 2 2 2 3 1 3 2 1 1 3 ...
#  $ obese2: Ord.factor w/ 3 levels "normal"<"overweight"<..: 1 1 1 3 1 3 2 1 1 2 ...
m1 = umxACE(selDVs = selDVs, dzData = dzData, mzData = mzData, suffix = '')

# I'm going to turn on plotting by default from summary
umx_set_auto_plot("name")
umxSummary(m1)

# ============================================
# = Bivariate continuous and ordinal example =
# ============================================
selDVs = c("wt", "obese")
mzData <- tmpTwin[tmpTwin$zyg %in% "MZFF", umx_paste_names(selDVs, "", 1:2)]
dzData <- tmpTwin[tmpTwin$zyg %in% "DZFF", umx_paste_names(selDVs, "", 1:2)]
m1 = umxACE(selDVs = selDVs, dzData = dzData, mzData = mzData, suffix = '')
umxSummary(m1)

# =================================
# = See how umx_paste_names works =
# =================================
umx_paste_names(selDVs, "", 1:2)

# =======================================
# = Mixed continuous and binary example =
# =======================================
# Cut to form category of 20% obese subjects
cutPoints <- quantile(tmpTwin[, "bmi1"], probs = .2, na.rm = TRUE)
obesityLevels = c('normal', 'obese')
tmpTwin$obese1 <- cut(tmpTwin$bmi1, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels) 
tmpTwin$obese2 <- cut(tmpTwin$bmi2, breaks = c(-Inf, cutPoints, Inf), labels = obesityLevels) 
# Make the ordinal variables into mxFactors (ensure ordered is TRUE, and require levels)
ordDVs = c("obese1", "obese2")
tmpTwin[, ordDVs] <- mxFactor(tmpTwin[, ordDVs], levels = obesityLevels)
selDVs = c("wt", "obese")
mzData <- tmpTwin[tmpTwin$zyg == "MZFF", umx_paste_names(selDVs, "", 1:2)]
dzData <- tmpTwin[tmpTwin$zyg == "DZFF", umx_paste_names(selDVs, "", 1:2)]
m1 = umxACE(selDVs = selDVs, dzData = dzData, mzData = mzData, suffix = '')
umxSummary(m1)

# ===================================
# Example with covariance data only =
# ===================================
selDVs = c("wt1", "wt2")
mz = cov(tmpTwin[tmpTwin$zyg %in% "MZFF", selDVs], use = "complete")
dz = cov(tmpTwin[tmpTwin$zyg %in% "DZFF", selDVs], use = "complete")
m1 = umxACE(selDVs= selDVs, dzData=dz, mzData=mz, numObsDZ=nrow(dzData), numObsMZ=nrow(mzData))
umxSummary(m1)
plot(m1)


# If there's more time, or you're exploring. Let's try umxCP
# Learn more about umx here:  http://tbates.github.io
# And read the preprint: https://www.dropbox.com/s/sntl3lmifey4tox/umx%20preprint.pdf?dl=0

require(umx)
data(twinData)
selDVs = c("ht", "wt")
mzData <- subset(tmpTwin, zyg == "MZFF", umx_paste_names(selDVs, "", 1:2))
dzData <- subset(tmpTwin, zyg == "DZFF", umx_paste_names(selDVs, "", 1:2))
m1 = umxCP(selDVs = selDVs, dzData = dzData, mzData = mzData, suffix = "")
umxSummary(m1)
umxGetParameters(m1, "^c", free = TRUE)
umxGetParameters(m1, free = TRUE)

# ===========================================
# = Let's try dropping C from this analysis =
# ===========================================
# cs   matrix = specific c paths
# c_cp matrix = common pathway c paths
m2 = umxModify(m1, update = "c_cp_r1c1", name = "drop_cp")
m3 = umxModify(m2, update = c("cs_r1c1", "cs_r2c2"), name = "drop_cp_and_cs")
umxSummaryCP(m2, comparison = m1, dotFilename = NA)
umxCompare(m1, c(m2,m3))
umxCompare(m2, m3)
plot(m2)


# ===============================
# = Homework: Adding covariates =
# ===============================
# Try umxACEcov() and covary for age

require(umx)
data(twinData)
tmpTwin <- twinData
# add age 1 and age 2 columns
tmpTwin$age1 = tmpTwin$age2 = tmpTwin$age
# Pick the variables. We will use base names (i.e., "bmi") and set suffix.
selDVs  = c("bmi")
selCovs = c("age")
selVars = umx_paste_names(c(selDVs, selCovs), textConstant = "", suffixes= 1:2)
# just top 200 so example runs in a couple of secs
mzData = subset(tmpTwin, zyg == 1, selVars)[1:200, ]
dzData = subset(tmpTwin, zyg == 3, selVars)[1:200, ]
# TODO update for new dataset variable zygosity
# mzData = subset(tmpTwin, zygosity == "MZFF", selVars)[1:200, ]
# dzData = subset(tmpTwin, zygosity == "DZFF", selVars)[1:200, ]
m_cov = umxACEcov(selDVs = selDVs, selCovs = selCovs, dzData = dzData, mzData = mzData, suffix = "")
umxSummary(m_cov)
# |    |    a1|   c1|   e1|
# |:---|-----:|----:|----:|
# |bmi | -0.94| 0.03| 0.35|
m_ace = umxACE(selDVs = selDVs, dzData = dzData, mzData = mzData, suffix = "")
umxSummary(m_ace)
# |     |   a1|c1 |  e1|
# |:----|----:|:--|---:|
# |bmi1 | 0.86|.  | 0.5|


# ======================================================
# = umxRAM to see how to solve general SEM path models =
# ======================================================

# 1. For convenience, list up the manifests you will be using
selVars = c("mpg", "wt", "disp")
# 2. Create an mxData object
myCov = mxData(cov(mtcars[,selVars]), type = "cov", numObs = nrow(mtcars) )

# 3. Create the model (see ?umxPath for more nifty options)
m1 = umxRAM("tim", data = myCov,
	umxPath(c("wt", "disp"), to = "mpg"),
	umxPath("wt", with = "disp"),
	umxPath(var = c("wt", "disp", "mpg"))
)

# 4. Print a nice summary 
umxSummary(m1, show = "std")
plot(m1)


# Learn more about umx here:  http://tbates.github.io
# And read the preprint: https://www.dropbox.com/s/sntl3lmifey4tox/umx%20preprint.pdf?dl=0