#### "Power on the back of an envelope" ## Some useful bits of R ## functions based on normal tests ## Written mostly by Lindon Eaves ##Always always always check the help of new functions ?dnorm #dnorm stands for density of normal distribution # normal probability integral P<-pnorm(1.96,0,1,lower.tail=FALSE) # threshold to give required integral (area, probability) q<-qnorm(0.05,0,1,lower.tail=FALSE) # generate random sample of 5 values from N[0,1] x<-rnorm(5,0,1) # generate set (vector (aka array), of sequential numbers) z<-seq(-5,5,by=0.02) # generate ordinates of normal curve for given z values y<-dnorm(z,0,1) # plot curve plot(z,y,type="l",col="black",main="Standardized Normal Distribution") # add another curve 1 unit to the right lines(z+1,y,col="red") ### Set up a twin study ### true correlations rmz<-0.2 rdz<-0.1 # initial sample sizes Nmz<-100 Ndz<-100 # variances of correlations (approx. assuming normal and no correlation) # Note that a correction for r can be applied - such that (1/n)*(1-r^2)^2 = variance Vmz<-1/Nmz Vdz<-1/Ndz # difference between correlations delta<-rmz-rdz # variance of difference as Var(X-Y) = Var(x)+var(y) - 2*cov(X,Y) # Note that 2*cov(X,Y) = 0 as we assume our MZs and DZs are independent Vdelta<-Vmz+Vdz # standard error of delta sdelta<-sqrt(Vdelta) # expected value of t for given test zdelta<-delta/sdelta # probability that t>zdelta, invoking the t distribution P<-pt(zdelta,Nmz+Ndz-2,0,lower.tail=FALSE) # Plot distribution of z around expected value # generate set (vector, array, of sequential numbers) z<-seq(-5,5,by=0.02) # generate ordinates of normal curve for given z values y<-dnorm(z,0,1) # plot curve plot(z,y,type="l",col="black",main="Distibution of t under NH and alternative hypothesis") # add another curve zdelta units to the right lines(z+zdelta,y,col="red") # How often would t be significant with these sample sizes (i.e. what is power of current study)? # Supply alpha alpha=0.05 # t value needed to give alpha under NH zcrit<-qt(alpha,Nmz+Ndz-2,0,lower.tail=FALSE) #what is zcrit? zcrit #Note that this critical value is for a one-tailed test # add a vertical line to figure at zcrit # abline is a great little command for adding a single line... abline(v=zcrit) # Power of test with current sample sizes beta<-pnorm(zcrit,zdelta,1,lower.tail=FALSE) # zcrit is the threshold of significance under the NH; zdelta is the mean t value with the current sample # Now figure out sample sizes needed for desired power #supply desired power (beta) beta<-0.8 # How far does the curve need to move to the right to give beta=0.8 zadd<-qt(beta,Nmz+Ndz-2,0,lower.tail=TRUE) zwant<-zcrit+zadd #lines is a great command too for plotting - it adds a line to the plot lines(z+zwant,y,col="green") # our initial samples sizes, Nmz and Ndz, gave us a t of zdelta # but we want samples to give us a expected t of zwant to give us power beta # compute the multiplication factor to get the desired sample sizes factor<-(zwant/zdelta)^2 # so our desired sample sizes are NNmz<-Nmz*factor NNdz<-Ndz*factor