# Title: Binary Data Warm Up # Author: Chelsea Sawyers # Date: 2024 03 06 ##################################################################### # Set up ---- # Load Library require(OpenMx) require(ggplot2) require(gmodels) require(ggfx) require(psych) ################ #### PART 1 #### ################ # In the first part of the practical we will look at the area under the univariate normal distribution. # The qnorm() function provides the z statistic for a specified probability # Note: This approach is used on raw data that has been standardized to a mean of zero # What is the threshold where 5% of the sample will be to the left of the threshold (below threshold) # and 95% will be to the right (above threshold)? qnorm(.05) # How about 97.5% below the threshold? qnorm(.975) ######################################### # Ok now let's visualize this: ######################################### # Set seed for reproducibility set.seed(123) # Generate random data from a normal distribution data <- rnorm(1000000) # Create a data frame df <- data.frame(x = data) # Now we can take the z score found from qnorm(.975) and plot the threshold against a normal distribution # Plot the normal distribution and threshold ggplot(df, aes(x = x)) + geom_density(fill = "white", alpha = 0.5) + geom_vline(xintercept = 1.96, linetype = "dashed", color = "red", linewidth = .5) + as_reference(geom_density(fill = "white", alpha=0.5), id = "density") + with_blend(annotate("rect", xmin = -5, xmax = 1.96, ymin = -Inf, ymax = Inf, fill = "skyblue"), bg_layer = "density", blend_type = "atop")+ annotate("text", x = 0.8, y = 0.1, label = "Z = 1.96", color = "red", vjust = -1) + labs(title = "Normal Distribution with Z Score Marked", x = "X", y = "Density") + theme_classic() ##################### #### Questions #### ##################### # Take 2 minutes and answer the questions below # What is the z statistic for a probability of .5? qnorm(.5) # What is the z statistic for a probability of .90? qnorm(.9) # What is the z statistic for a probability of .99? qnorm(.99) # Bonus: If you get done early, feel free to copy the plotting code from above # and modify to visualize for .5, .9 or .99 thresholds #Note: Change the 2 instances of 1.96 in the script above to the z-score of your choice # example of qnorm(.9) plotted: ggplot(df, aes(x = x)) + geom_density(fill = "white", alpha = 0.5) + geom_vline(xintercept = 1.28, linetype = "dashed", color = "red", linewidth = .5) + as_reference(geom_density(fill = "white", alpha=0.5), id = "density") + with_blend(annotate("rect", xmin = -5, xmax = 1.28, ymin = -Inf, ymax = Inf, fill = "skyblue"), bg_layer = "density", blend_type = "atop")+ annotate("text", x = 0, y = 0.1, label = "Z = 1.28", color = "red", vjust = -1) + labs(title = "Normal Distribution with Z Score Marked", x = "X", y = "Density") + theme_classic() ######################################### # Part 2: Let's try this with data now! ######################################### # Load Data data(twinData) describe(twinData, skew=F) # Select Variables for Analysis Vars <- 'bmiB' nv <- 1 # number of variables ntv <- nv*2 # number of total variables selVars <- paste(Vars,c(rep(1,nv),rep(2,nv)),sep="") #c('bmi1','bmi2') # Create binary data and make it an mxFactor twinData$bmiB1 <- ifelse(twinData$bmi1<22,1.00,0.00) # BMI less than 22 is 1, "affected with low BMI" twinData$bmiB2 <- ifelse(twinData$bmi2<22,1.00,0.00) # BMI less than 22 is 1, "affected with low BMI" # Select Data for Analysis mzDataBin <- data.frame(subset(twinData, zyg==1, selVars)) dzDataBin <- data.frame(subset(twinData, zyg==3, selVars)) ######################################### # Developing a Sense for the Thresholds ######################################### # Inverse of Normal Cummulative Distribution CrossTable(twinData$bmiB1) # all twin 1 CrossTable(twinData$bmiB2) # all twin 2 # Given the MZ twin 1 contingency table: CrossTable(mzDataBin$bmiB1) # MZ twins, with percentage of un/affected listed below raw value # What is the z-score for the threshold for MZ twin 1? qnorm(0.816) #have %, want to know z statistic pnorm(.900) #have z statistic, want to know % # Your turn now, Given the MZ twin 2 contingency table: CrossTable(mzDataBin$bmiB2) # What is the z-score for the threshold for MZ twin 2? qnorm(0.825) #hint: the percentage of affected/unaffected is listed below the raw value # Take a look at DZ data CrossTable(dzDataBin$bmiB1) CrossTable(dzDataBin$bmiB2) # Do MZ and DZ twins have similar proportions of unaffected/affected? # Has slightly higher prevalence rates (.816 -.825) vs DZs (.788 - .796)