# # File selonxOpenMx.R # # Simulate data to illustrate that maximum likelihood is robust to # certain types of missing data, particularly where missingness is # predicted by measures in the dataset that are not missing. # Here we will explore what happens to the estimates # when the second variable (Y) is missing whenever the first variable # (X) is greater than zero # # M C Neale March 6 2020 # # Load Libraries library("MASS") library(OpenMx) library(rgl) library(fMultivar) library(car) library(plot3D) # Simulate bivariate data r=.5 set.seed(5692148) r <- 0.5 # Correlation (r) N <- 1000 # Number of pairs of scores to simulate Sigma <- matrix(c(1,r,r,1),2,2) Mu <- c(0,0) pairs <- mvrnorm(N, Mu, Sigma) varnames <- c("X","Y") dimnames(pairs) <- list(NULL,varnames) # Make a copy of the data for later pairsOriginal <- pairs # Take a look at complete data cov(pairsOriginal) colMeans(pairsOriginal, na.rm=T) scatter.smooth(pairsOriginal) # Now to make some of the **Y scores missing due to value of X** # This is known as Missing At Random (MAR) in Little & Rubin (1987) sense ######### CASE 1: Y is missing iff X > 0 "SelonX" ######### pairs[pairs[,1]>0, 2] <- NA scatter.smooth(pairs) # The plot doesn't show the unmatched X observations # Nifty, that, now let's look at some summary statistics of the observed data head(pairs) colMeans(pairs, na.rm=T) cov(pairs,use='complete.obs') cor(pairs,use='complete.obs') # So complete data only have funny summary statistics # Set up OpenMx model to estimate variances, covariance and means (it's a saturated model) sigma <- mxMatrix("Symm", 2, 2, values=diag(2), free=T, name="sigma") mu <- mxMatrix("Full", 1, 2, free=T, name="mu") obj <- mxFitFunctionML(); expec <- mxExpectationNormal(covariance="sigma", mean="mu", dimnames=varnames) data <- mxData(pairs, type='raw') missMod <- mxModel("MAR SelonX Missing on Y due to X values", sigma, mu, obj, expec, data) # Fit model and look at output summary(missModRun <- mxRun(missMod), refModels=mxRefModels(missMod, run = TRUE)) mxEval(cov2cor(sigma),missModRun) # Should see that despite being given data with sample means away from zero, # and sample variances away from 1. How do the ML estimates look? # Here we compare the results of the simulation-recover exercise # to the values used for the simulation (subModel) and look at the chi-squared # test. To evaluate the performance of a model, this step could # be looped and compared to the chi-sq with 5 df repeatedly subModel <- mxRename(missModRun,"missModSub") subModel$sigma$values[]<-matrix(c(1,r,r,1),2,2) subModel$sigma$free[]<-F subModel$mu$values[]<-0 subModel$mu$free[]<-F subModelFit <- mxRun(subModel) mxCompare(missModRun, subModelFit) ######### CASE 2: Y is missing iff X > 0 but analyze complete pairs ######### # Now what would happen if we foolishly analyzed only the complete pairs? data2 <- mxData(pairs[!is.na(pairs[,2]),], type='raw') missMod2 <- mxModel("Missing on Y due to X selonX but Complete Pairs Only", sigma, mu, obj, expec, data2) summary(missModRun2 <- mxRun(missMod2), refModels=mxRefModels(missMod2, run = TRUE)) mxEval(cov2cor(sigma),missModRun2) ######### CASE 3: Y is missing iff **Y** > 0 "SelonY" ######### # Now to make some of the **Y scores missing due to value of Y** pairsMissOnY <- pairsOriginal pairsMissOnY[pairsMissOnY[,2]>0,2] <- NA data3 <- mxData(pairsMissOnY, type='raw') missMod3 <- mxModel("NMAR Missing on Y due to Y values selonY", sigma, mu, obj, expec, data3) summary(missModRun3 <- mxRun(missMod3), refModels=mxRefModels(missMod3, run = TRUE)) mxEval(cov2cor(sigma),missModRun3) # What do you notice about these ML estimates? # Let's plot the data pairsNA3 <- pairs pairsNA3[is.na(pairsNA3[,2]),2]<--4 heights <- dnorm2d(pairsOriginal[,1], pairsOriginal[,2], rho = cor(pairsOriginal)[2,1]) scatter3D(pairsNA3[,1],pairsNA3[,2], heights, phi = 40, theta = 90) # And plot it so we can manipulate it plot3d(pairsNA3[,1],pairsNA3[,2], heights, rho = cor(pairsOriginal)[2,1],phi = 40, theta = 90) ######### CASE 4: Y is missing iff X > 0 but FIX x mean to 0 ######### missModFixedMean <- missMod missModFixedMean$mu@values[1,1] <- 0 missModFixedMean$mu@free[1,1] <- F missModFixedMean summary(missModFixedMeanFit <- mxRun(missModFixedMean)) mxEval(cov2cor(sigma),missModFixedMeanFit) # Conclusion: incorrect estimate of mean of X causes bias in other parameter estimates