R version 2.10.1 (2009-12-14)
Copyright (C) 2009 The R Foundation for Statistical Computing
ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> # ---------------------------------------------------------------------
> # Program: TW-IntroToR-20100210-Answers.R
> #  Author: Matt Keller & Steve Boker
> #    Date: Mon Mar 1 15:16:40 EST 2010
> #
> #  Here are the answers to the workshop exercizes.
> #
> # ---------------------------------------------------------------------
> # Revision History
> #    -- Wed Feb 10 12:02:11 EST 2010
> #      Created TW-IntroToR-20100210.R.
> #    -- Mon Mar 1 15:17:32 EST 2010
> #      Created TW-IntroToR-20100210-Answers.R.
> # ---------------------------------------------------------------------
> 
> 
> 
> # ---------------------------------------------------------------------
> # PROBLEM SET 1
> #   Put your work directly into this script, below the q's
> # ---------------------------------------------------------------------
> #
> # a) Create a vector of 100 normally distributed random variables 
> #    (mean = 0 & sd = 1). Assign it to "Y"
> #
> 
> Y <- rnorm(100, mean=0, sd=1)
> 
> # b) Create another vector, "Z", of 100 normally distributed random
> #    numbers with mean = 100 and the sd = 15. 
> #    HINT: See the help function if you get stuck!
> #
> 
> Z <- rnorm(100, mean=100, sd=15)
> 
> # c) Create another variable, "Sum.dist", that is the sum of Y and Z
> #
> 
> Sum.dist <- Y + Z
> 
> # d) Put the vector "Sum.dist" into a matrix with 20 rows and 
> #    5 columns.
> #    Do so such that the numbers are put in BY ROW. 
> #    Call the matrix "My.Mat"
> #
> 
> My.Mat <- matrix(Sum.dist, nrow=20, ncol=5, byrow=TRUE)
> 
> # e) Get a new matrix that only has the rows of My.Mat where the 
> #    first column of My.Mat is less than 100. (Note: everyone's
> #    matrix will be different, but should have ~ 10 rows).
> 
> lessThan100 <- My.Mat[,1] < 100
> New.Mat <- My.Mat[lessThan100, ]
> New.Mat
          [,1]      [,2]      [,3]      [,4]      [,5]
 [1,] 89.24776 106.21913 108.88924  83.00376 108.61926
 [2,] 73.03124  98.53947 117.32064 107.64436  87.10820
 [3,] 97.85314  99.95734  83.34317 120.25056  88.43612
 [4,] 93.63363  97.27824 104.50908 109.44640 109.62647
 [5,] 96.23976  77.62451  73.93948  84.44467 112.06190
 [6,] 75.71240  99.76795  98.46386 103.00063 111.06102
 [7,] 74.15800  99.83449  90.99369  53.06807  77.71956
 [8,] 96.93662 130.74284  80.33114  81.79560  97.46776
 [9,] 81.29765 120.95378 116.32105  98.57531  74.83224
> 
> # ---------------------------------------------------------------------
> 
> 
> # ---------------------------------------------------------------------
> # PROBLEM SET 2
> #   Put your work directly into this script, below the q's
> # ---------------------------------------------------------------------
> #
> # a) Read in the data in ExampleData2.csv
> #
> 
> require(psych)
> my.DataFrame <- read.csv("ExampleData2.csv", header=TRUE)
> describe(my.DataFrame)
          var   n    mean     sd  median trimmed    mad     min     max  range
IDNum       1 800 1200.50 115.54 1200.50 1200.50 148.26 1001.00 1400.00 399.00
Zygosity*   2 800    1.50   0.50    1.50    1.50   0.74    1.00    2.00   1.00
TwinNum     3 800    1.50   0.50    1.50    1.50   0.74    1.00    2.00   1.00
X           4 800    0.11   1.77    0.06    0.11   1.73   -5.34    5.45  10.80
Y           5 800    0.02   1.09    0.06    0.02   1.01   -3.40    3.77   7.17
          skew kurtosis   se
IDNum     0.00    -1.20 4.09
Zygosity* 0.00    -2.00 0.02
TwinNum   0.00    -2.00 0.02
X         0.01    -0.06 0.06
Y         0.01     0.32 0.04
> summary(my.DataFrame)
     IDNum      Zygosity    TwinNum          X                 Y           
 Min.   :1001   DZ:400   Min.   :1.0   Min.   :-5.3418   Min.   :-3.40020  
 1st Qu.:1101   MZ:400   1st Qu.:1.0   1st Qu.:-1.0378   1st Qu.:-0.69775  
 Median :1200            Median :1.5   Median : 0.0577   Median : 0.05965  
 Mean   :1200            Mean   :1.5   Mean   : 0.1085   Mean   : 0.02188  
 3rd Qu.:1300            3rd Qu.:2.0   3rd Qu.: 1.2899   3rd Qu.: 0.69105  
 Max.   :1400            Max.   :2.0   Max.   : 5.4542   Max.   : 3.77010  
> 
> # b) Select all rows with TwinID equal to 1 and Zygosity equal to "MZ"
> #
> 
> twinOneMzSelect <- my.DataFrame$TwinNum==1 & my.DataFrame$Zygosity=="MZ"
> twinOneMzData <- my.DataFrame[twinOneMzSelect,]
> 
> # c) Calculate a correlation matrix for those twins.
> #
> 
> cor(twinOneMzData)
               IDNum Zygosity TwinNum           X          Y
IDNum     1.00000000       NA      NA -0.08762454 -0.0560598
Zygosity          NA        1      NA          NA         NA
TwinNum           NA       NA       1          NA         NA
X        -0.08762454       NA      NA  1.00000000  0.9487119
Y        -0.05605979       NA      NA  0.94871186  1.0000000
> 
> # d) Now calculate a correlation matrix for TwinID equals 2 and
> #    Zygosity equal to "MZ".
> #
> 
> twinTwoMzSelect <- my.DataFrame$TwinNum==2 & my.DataFrame$Zygosity=="MZ"
> twinTwoMzData <- my.DataFrame[twinTwoMzSelect,]
> cor(twinTwoMzData)
              IDNum Zygosity TwinNum          X          Y
IDNum     1.0000000       NA      NA -0.1465920 -0.1277538
Zygosity         NA        1      NA         NA         NA
TwinNum          NA       NA       1         NA         NA
X        -0.1465920       NA      NA  1.0000000  0.9433597
Y        -0.1277538       NA      NA  0.9433597  1.0000000
> 
> round(cor(twinTwoMzData$X, twinTwoMzData$Y), 3)
[1] 0.943
> 
> # e) Try running a linear model with Y being predicted by X for
> #    only rows with TwinID 2 and Zygosity "DZ".
> #
> 
> twinTwoDzSelect <- my.DataFrame$TwinNum==2 & my.DataFrame$Zygosity=="DZ"
> twinTwoDzData <- my.DataFrame[twinTwoDzSelect,]
> lmOut <- lm(Y~X, data=twinTwoDzData)
> summary(lmOut)

Call:
lm(formula = Y ~ X, data = twinTwoDzData)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.01326 -0.51742 -0.06778  0.47728  2.46861 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.03655    0.05315  -0.688    0.492    
X            0.25225    0.02830   8.913 3.28e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 0.7467 on 198 degrees of freedom
Multiple R-squared: 0.2863,	Adjusted R-squared: 0.2827 
F-statistic: 79.44 on 1 and 198 DF,  p-value: 3.276e-16 

> 
> # ---------------------------------------------------------------------
> 
> 
> 
> 
> 
> 
> 
> 
>