####  "Power on the back of an envelope"  
##  Some useful bits of R
##  functions based on normal tests
##  Written mostly by Lindon Eaves 

##Always always always check the help of new functions
?dnorm     #dnorm stands for density of normal distribution

#  normal probability integral
P<-pnorm(1.96,0,1,lower.tail=FALSE)

#  threshold to give required integral (area, probability)
q<-qnorm(0.05,0,1,lower.tail=FALSE)

#  generate random sample of 5 values from N[0,1]
x<-rnorm(5,0,1)


#  generate set (vector (aka array), of sequential numbers)
z<-seq(-5,5,by=0.02)

#  generate ordinates of normal curve for given z values
y<-dnorm(z,0,1)
#  plot curve
plot(z,y,type="l",col="black",main="Standardized Normal Distribution")


#  add another curve 1 unit to the right
lines(z+1,y,col="red")

###  Set up a twin study
###  true correlations
rmz<-0.2
rdz<-0.1
#  initial sample sizes
Nmz<-100
Ndz<-100
# variances of correlations (approx. assuming normal and no correlation)
# Note that a correction for r can be applied - such that (1/n)*(1-r^2)^2 = variance
Vmz<-1/Nmz
Vdz<-1/Ndz
# difference between correlations
delta<-rmz-rdz

# variance of difference as Var(X-Y) = Var(x)+var(y) - 2*cov(X,Y)
# Note that 2*cov(X,Y) = 0 as we assume our MZs and DZs are independent

Vdelta<-Vmz+Vdz

# standard error of delta
sdelta<-sqrt(Vdelta)


# expected value of t for given test
zdelta<-delta/sdelta


# probability that t>zdelta, invoking the t distribution
P<-pt(zdelta,Nmz+Ndz-2,0,lower.tail=FALSE)

#  Plot distribution of z around expected value
#  generate set (vector, array, of sequential numbers)
z<-seq(-5,5,by=0.02)
#  generate ordinates of normal curve for given z values
y<-dnorm(z,0,1)
#  plot curve
plot(z,y,type="l",col="black",main="Distibution of t under NH and alternative hypothesis")
#  add another curve zdelta units to the right
lines(z+zdelta,y,col="red")


#  How often would t be significant with these sample sizes (i.e. what is power of current study)?
#  Supply alpha
alpha=0.05
#  t value needed to give alpha under NH
zcrit<-qt(alpha,Nmz+Ndz-2,0,lower.tail=FALSE)
#what is zcrit?
zcrit

#Note that this critical value is for a one-tailed test

# add a vertical line to figure at zcrit
# abline is a great little command for adding a single line...

abline(v=zcrit)

# Power of test with current sample sizes
beta<-pnorm(zcrit,zdelta,1,lower.tail=FALSE)

# zcrit is the threshold of significance under the NH;  zdelta is the mean t value with the current sample

# Now figure out sample sizes needed for desired power
#supply desired power (beta)
beta<-0.8
# How far does the curve need to move to the right to give beta=0.8
zadd<-qt(beta,Nmz+Ndz-2,0,lower.tail=TRUE)
zwant<-zcrit+zadd

#lines is a great command too for plotting - it adds a line to the plot
lines(z+zwant,y,col="green")


# our initial samples sizes, Nmz and Ndz, gave us a t of zdelta
#  but we want samples to give us a expected t of zwant to give us power beta
# compute the multiplication factor to get the desired sample sizes
factor<-(zwant/zdelta)^2

# so our desired sample sizes are
NNmz<-Nmz*factor
NNdz<-Ndz*factor