# ---------------------------------------------------------------------
# Program: TW-IntroToR-20100210-Answers.R
#  Author: Matt Keller & Steve Boker
#    Date: Mon Mar 1 15:16:40 EST 2010
#
#  Here are the answers to the workshop exercizes.
#
# ---------------------------------------------------------------------
# Revision History
#    -- Wed Feb 10 12:02:11 EST 2010
#      Created TW-IntroToR-20100210.R.
#    -- Mon Mar 1 15:17:32 EST 2010
#      Created TW-IntroToR-20100210-Answers.R.
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# PROBLEM SET 1
#   Put your work directly into this script, below the q's
# ---------------------------------------------------------------------
#
# a) Create a vector of 100 normally distributed random variables 
#    (mean = 0 & sd = 1). Assign it to "Y"
#

Y <- rnorm(100, mean=0, sd=1)

# b) Create another vector, "Z", of 100 normally distributed random
#    numbers with mean = 100 and the sd = 15. 
#    HINT: See the help function if you get stuck!
#

Z <- rnorm(100, mean=100, sd=15)

# c) Create another variable, "Sum.dist", that is the sum of Y and Z
#

Sum.dist <- Y + Z

# d) Put the vector "Sum.dist" into a matrix with 20 rows and 
#    5 columns.
#    Do so such that the numbers are put in BY ROW. 
#    Call the matrix "My.Mat"
#

My.Mat <- matrix(Sum.dist, nrow=20, ncol=5, byrow=TRUE)

# e) Get a new matrix that only has the rows of My.Mat where the 
#    first column of My.Mat is less than 100. (Note: everyone's
#    matrix will be different, but should have ~ 10 rows).

lessThan100 <- My.Mat[,1] < 100
New.Mat <- My.Mat[lessThan100, ]
New.Mat

# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# PROBLEM SET 2
#   Put your work directly into this script, below the q's
# ---------------------------------------------------------------------
#
# a) Read in the data in ExampleData2.csv
#

require(psych)
my.DataFrame <- read.csv("ExampleData2.csv", header=TRUE)
describe(my.DataFrame)
summary(my.DataFrame)

# b) Select all rows with TwinID equal to 1 and Zygosity equal to "MZ"
#

twinOneMzSelect <- my.DataFrame$TwinNum==1 & my.DataFrame$Zygosity=="MZ"
twinOneMzData <- my.DataFrame[twinOneMzSelect,]

# c) Calculate a correlation matrix for those twins.
#

cor(twinOneMzData)

# d) Now calculate a correlation matrix for TwinID equals 2 and
#    Zygosity equal to "MZ".
#

twinTwoMzSelect <- my.DataFrame$TwinNum==2 & my.DataFrame$Zygosity=="MZ"
twinTwoMzData <- my.DataFrame[twinTwoMzSelect,]
cor(twinTwoMzData)

round(cor(twinTwoMzData$X, twinTwoMzData$Y), 3)

# e) Try running a linear model with Y being predicted by X for
#    only rows with TwinID 2 and Zygosity "DZ".
#

twinTwoDzSelect <- my.DataFrame$TwinNum==2 & my.DataFrame$Zygosity=="DZ"
twinTwoDzData <- my.DataFrame[twinTwoDzSelect,]
lmOut <- lm(Y~X, data=twinTwoDzData)
summary(lmOut)

# ---------------------------------------------------------------------