#! /usr/bin/Rscript
# Creates effect sizes from N(0,sqrt(1/2pq)), and creates genetic phenotypes from that and raw 0,1,2 scores
# It then normalizes the genetic phenotypes to N(0,1), then adds in error variance for the specified h2

#  Draws in plink-generated .raw format files (0/1/2 coding), from --recode A


M<-3363 #number of individuals
h2=0.5 #simulated narrow-sense heritability
P<-0.2 # simulated prevalance for dichotomous phenotype

  	data<-read.table("data.raw"),header=T)
    CG<-data[,7:ncol(data)]   # in 012 additive format with the first 6 rows like a ped file from plink 
	II<-data[,1] # individual 
	pi<-apply(CG,2,sum)/(2*M)
	Bg<-rnorm(ncol(CG),0,sqrt(1/(2*pi*(1-pi)))) # within MAF classes, this allows rarer variants to have larger effect sizes. Within these classes, this results in Bg^2*(2pq) [the additive Vg] to be uniform
    gpheno<-Bg%*%t(CG) # creating phenotypes as functions of genetic effects and genotypes        
    ngpheno<-(gpheno-mean(gpheno))/sd(gpheno)  # normalizing the genetic phenos to N(0,1), also not really necessary, but makes the math easy for Vg,Vp, Ve
    hist(ngpheno)
    Vg<-var(as.vector(ngpheno))
    pheno<-ngpheno+rnorm(M,0,sqrt((1-h2_3)*Vg/h2)) 
   	Vp<-var(as.vector(pheno))

    h2_hat<-Vg/
    Spheno<-cbind(as.character(II),as.character(II),t(pheno))
    write.table(Spheno,"phenotypes.txt",row.names=F,col.names=F,quote=F,sep="\t")
	print (c("h2_sample: ",h2_hat))