# Simulate bivariate data r=.5

library("MASS")
r <- 0.5	# Correlation (r)
N <- 1000	# Number of pairs of scores to simulate

#Data generation follows: 
Sigma	<- matrix(c(1,r,r,1),2,2)
Mu 		<- c(0,0)
pairs 	<- mvrnorm(N, Mu, Sigma)
varnames <- c("X","Y")
dimnames(pairs) <- list(NULL,varnames)

# Make a copy of the data for later
pairsOriginal <- pairs

# Take a look at complete data
cov(pairs)
colMeans(pairs, na.rm=T)

# Now to make some of the **Y scores missing due to value of X**
# This is known as Missing At Random (MAR) in Little & Rubin (1987) sense
#########  CASE 1: Y is missing iff X > 0 "SelonX" #########
pairs[pairs[,1]>0, 2] <- NA

# Nifty, that, now let's look at the attritted data
head(pairs)
colMeans(pairs, na.rm=T)
cov(pairs,use='complete.obs')
cor(pairs,use='complete.obs')
# So complete data only have funny summary statistics

# Set up OpenMx model
require(OpenMx)
sigma 	<- mxMatrix("Symm", 2, 2, values=diag(2), free=T, name="sigma")
mu 		<- mxMatrix("Full", 1, 2, free=T, name="mu")
obj 	<- mxFIMLObjective(covariance="sigma", mean="mu", dimnames=varnames)
data 	<- mxData(pairs, type='raw')
missMod	<- mxModel("MAR SelonX Missing on Y due to X values", sigma, mu, obj, data)

# Fit model and look at output
summary(missModRun <- mxRun(missMod))
mxEval(cov2cor(sigma),missModRun)

# Should see that despite being given data with sample means away from zero,
# 	and sample variances away from 1.  How do the ML estimates look?

#########  CASE 2: Y is missing iff X > 0 but analyze complete pairs #########
# Now what would happen if we foolishly analyzed only the complete pairs?
data2 <- mxData(pairs[!is.na(pairs[,2]),], type='raw')
missMod2 <- mxModel("Missing on Y due to X selonX but Complete Pairs Only", sigma, mu, obj, data2)
summary(missModRun2 <- mxRun(missMod2))
mxEval(cov2cor(sigma),missModRun2)


#########  CASE 3: Y is missing iff **Y** > 0  "SelonY" #########
# Now to make some of the **Y scores missing due to value of Y**
pairsMissOnY <- pairsOriginal
pairsMissOnY[pairsMissOnY[,2]>0,2] <- NA
data3 <- mxData(pairsMissOnY, type='raw')
missMod3 <- mxModel("NMAR Missing on Y due to Y values selonY", sigma, mu, obj, data3)
summary(missModRun3 <- mxRun(missMod3))
mxEval(cov2cor(sigma),missModRun3)

# What do you notice about these ML estimates?  

# Let's plot the data

library(rgl)
library(fMultivar)
pairsNA3 <- pairs
pairsNA3[is.na(pairsNA3[,2]),2]<--4
heights <- dnorm2d(pairsOriginal[,1], pairsOriginal[,2], rho = cor(pairsOriginal)[2,1])
scatter3D(pairsNA3[,1],pairsNA3[,2], heights, phi = 40, theta = 90)

# And plot it so we can manipulate it
plot3d(pairsNA3[,1],pairsNA3[,2], heights, rho = cor(pairsOriginal)[2,1]),phi = 40, theta = 90)

#########  CASE 4: Y is missing iff X > 0  but FIX x mean to 0 #########
missModFixedMean <- missMod
missModFixedMean$mu@values[1,1] <- 0
missModFixedMean$mu@free[1,1] 	<- F
missModFixedMean
summary(missModFixedMeanFit <- mxRun(missModFixedMean))

# Conclusion: incorrect estimate of mean of X causes bias in other parameter estimates