#! /usr/bin/Rscript # Creates effect sizes from N(0,sqrt(1/2pq)), and creates genetic phenotypes from that and raw 0,1,2 scores # It then normalizes the genetic phenotypes to N(0,1), then adds in error variance for the specified h2 # Draws in plink-generated .raw format files (0/1/2 coding), from --recode A M<-3363 #number of individuals h2=0.5 #simulated narrow-sense heritability P<-0.2 # simulated prevalance for dichotomous phenotype data<-read.table("data.raw"),header=T) CG<-data[,7:ncol(data)] # in 012 additive format with the first 6 rows like a ped file from plink II<-data[,1] # individual pi<-apply(CG,2,sum)/(2*M) Bg<-rnorm(ncol(CG),0,sqrt(1/(2*pi*(1-pi)))) # within MAF classes, this allows rarer variants to have larger effect sizes. Within these classes, this results in Bg^2*(2pq) [the additive Vg] to be uniform gpheno<-Bg%*%t(CG) # creating phenotypes as functions of genetic effects and genotypes ngpheno<-(gpheno-mean(gpheno))/sd(gpheno) # normalizing the genetic phenos to N(0,1), also not really necessary, but makes the math easy for Vg,Vp, Ve hist(ngpheno) Vg<-var(as.vector(ngpheno)) pheno<-ngpheno+rnorm(M,0,sqrt((1-h2_3)*Vg/h2)) Vp<-var(as.vector(pheno)) h2_hat<-Vg/ Spheno<-cbind(as.character(II),as.character(II),t(pheno)) write.table(Spheno,"phenotypes.txt",row.names=F,col.names=F,quote=F,sep="\t") print (c("h2_sample: ",h2_hat))