# --------------------------------------------------------------------- # Program: TW-IntroToR-20100210-Answers.R # Author: Matt Keller & Steve Boker # Date: Mon Mar 1 15:16:40 EST 2010 # # Here are the answers to the workshop exercizes. # # --------------------------------------------------------------------- # Revision History # -- Wed Feb 10 12:02:11 EST 2010 # Created TW-IntroToR-20100210.R. # -- Mon Mar 1 15:17:32 EST 2010 # Created TW-IntroToR-20100210-Answers.R. # --------------------------------------------------------------------- # --------------------------------------------------------------------- # PROBLEM SET 1 # Put your work directly into this script, below the q's # --------------------------------------------------------------------- # # a) Create a vector of 100 normally distributed random variables # (mean = 0 & sd = 1). Assign it to "Y" # Y <- rnorm(100, mean=0, sd=1) # b) Create another vector, "Z", of 100 normally distributed random # numbers with mean = 100 and the sd = 15. # HINT: See the help function if you get stuck! # Z <- rnorm(100, mean=100, sd=15) # c) Create another variable, "Sum.dist", that is the sum of Y and Z # Sum.dist <- Y + Z # d) Put the vector "Sum.dist" into a matrix with 20 rows and # 5 columns. # Do so such that the numbers are put in BY ROW. # Call the matrix "My.Mat" # My.Mat <- matrix(Sum.dist, nrow=20, ncol=5, byrow=TRUE) # e) Get a new matrix that only has the rows of My.Mat where the # first column of My.Mat is less than 100. (Note: everyone's # matrix will be different, but should have ~ 10 rows). lessThan100 <- My.Mat[,1] < 100 New.Mat <- My.Mat[lessThan100, ] New.Mat # --------------------------------------------------------------------- # --------------------------------------------------------------------- # PROBLEM SET 2 # Put your work directly into this script, below the q's # --------------------------------------------------------------------- # # a) Read in the data in ExampleData2.csv # require(psych) my.DataFrame <- read.csv("ExampleData2.csv", header=TRUE) describe(my.DataFrame) summary(my.DataFrame) # b) Select all rows with TwinID equal to 1 and Zygosity equal to "MZ" # twinOneMzSelect <- my.DataFrame$TwinNum==1 & my.DataFrame$Zygosity=="MZ" twinOneMzData <- my.DataFrame[twinOneMzSelect,] # c) Calculate a correlation matrix for those twins. # cor(twinOneMzData) # d) Now calculate a correlation matrix for TwinID equals 2 and # Zygosity equal to "MZ". # twinTwoMzSelect <- my.DataFrame$TwinNum==2 & my.DataFrame$Zygosity=="MZ" twinTwoMzData <- my.DataFrame[twinTwoMzSelect,] cor(twinTwoMzData) round(cor(twinTwoMzData$X, twinTwoMzData$Y), 3) # e) Try running a linear model with Y being predicted by X for # only rows with TwinID 2 and Zygosity "DZ". # twinTwoDzSelect <- my.DataFrame$TwinNum==2 & my.DataFrame$Zygosity=="DZ" twinTwoDzData <- my.DataFrame[twinTwoDzSelect,] lmOut <- lm(Y~X, data=twinTwoDzData) summary(lmOut) # ---------------------------------------------------------------------