# Simulate bivariate data r=.5 library("MASS") r <- 0.5 # Correlation (r) N <- 1000 # Number of pairs of scores to simulate #Data generation follows: Sigma <- matrix(c(1,r,r,1),2,2) Mu <- c(0,0) pairs <- mvrnorm(N, Mu, Sigma) varnames <- c("X","Y") dimnames(pairs) <- list(NULL,varnames) # Make a copy of the data for later pairsOriginal <- pairs # Take a look at complete data cov(pairs) colMeans(pairs, na.rm=T) # Now to make some of the **Y scores missing due to value of X** # This is known as Missing At Random (MAR) in Little & Rubin (1987) sense ######### CASE 1: Y is missing iff X > 0 "SelonX" ######### pairs[pairs[,1]>0, 2] <- NA # Nifty, that, now let's look at the attritted data head(pairs) colMeans(pairs, na.rm=T) cov(pairs,use='complete.obs') cor(pairs,use='complete.obs') # So complete data only have funny summary statistics # Set up OpenMx model require(OpenMx) sigma <- mxMatrix("Symm", 2, 2, values=diag(2), free=T, name="sigma") mu <- mxMatrix("Full", 1, 2, free=T, name="mu") obj <- mxFIMLObjective(covariance="sigma", mean="mu", dimnames=varnames) data <- mxData(pairs, type='raw') missMod <- mxModel("MAR SelonX Missing on Y due to X values", sigma, mu, obj, data) # Fit model and look at output summary(missModRun <- mxRun(missMod)) mxEval(cov2cor(sigma),missModRun) # Should see that despite being given data with sample means away from zero, # and sample variances away from 1. How do the ML estimates look? ######### CASE 2: Y is missing iff X > 0 but analyze complete pairs ######### # Now what would happen if we foolishly analyzed only the complete pairs? data2 <- mxData(pairs[!is.na(pairs[,2]),], type='raw') missMod2 <- mxModel("Missing on Y due to X selonX but Complete Pairs Only", sigma, mu, obj, data2) summary(missModRun2 <- mxRun(missMod2)) mxEval(cov2cor(sigma),missModRun2) ######### CASE 3: Y is missing iff **Y** > 0 "SelonY" ######### # Now to make some of the **Y scores missing due to value of Y** pairsMissOnY <- pairsOriginal pairsMissOnY[pairsMissOnY[,2]>0,2] <- NA data3 <- mxData(pairsMissOnY, type='raw') missMod3 <- mxModel("NMAR Missing on Y due to Y values selonY", sigma, mu, obj, data3) summary(missModRun3 <- mxRun(missMod3)) mxEval(cov2cor(sigma),missModRun3) # What do you notice about these ML estimates? # Let's plot the data library(rgl) library(fMultivar) pairsNA3 <- pairs pairsNA3[is.na(pairsNA3[,2]),2]<--4 heights <- dnorm2d(pairsOriginal[,1], pairsOriginal[,2], rho = cor(pairsOriginal)[2,1]) scatter3D(pairsNA3[,1],pairsNA3[,2], heights, phi = 40, theta = 90) # And plot it so we can manipulate it plot3d(pairsNA3[,1],pairsNA3[,2], heights, rho = cor(pairsOriginal)[2,1]),phi = 40, theta = 90) ######### CASE 4: Y is missing iff X > 0 but FIX x mean to 0 ######### missModFixedMean <- missMod missModFixedMean$mu@values[1,1] <- 0 missModFixedMean$mu@free[1,1] <- F missModFixedMean summary(missModFixedMeanFit <- mxRun(missModFixedMean)) # Conclusion: incorrect estimate of mean of X causes bias in other parameter estimates