#!/usr/bin/Rscript ############################################################################################ ### Genotype = two draws from a binomial distribution with an allele frequency rbinom(1,1,.5) ### 10 individuals, same allele frequency rbinom(10,2,.5) ### HWE expectations = p^2, 2pq, q^2 hweprop = c(0.5^2, 2*.5*.5,0.5^2) 100*hweprop table(rbinom(100,2,.5)) ############################################################################################ ### Creating many genotypes from a range of allele frequencies and for many individuals N<-2000 ### Number of individuals M<-500 ### Number of loci to create genotypes for af<-runif(M,0.05,0.45) ### Allele frequencies of a population hist(af,breaks=50) ggen<-function(rep){rbinom(M,2,af)} ### Function to make genotypes for individuals from the allele frequency gt<-sapply(rep(0,N),ggen) ### Creating the individual genotypes af_samp<-apply(gt,1,sum)/(N*2) ### The allele frequencies in the SAMPLE hist(af_samp,breaks=20) plot(af,af_samp) ### Why don't the allele frequencies in the simulated sample equal the true (also simulated) population allele frequencies? ############################################################################################ ### Making phenotypes from a simple polygenic model for the N individuals from M loci ### first make allelic effects: 1XM matrix b<-matrix(rnorm(M,0,1/sqrt(2*af*(1-af))),1,M) #why use afs rather than p? Impact of the assumed variance of allelic effects? #b<-rnorm(M,0,1) ### use a different variance for the allelic effects par(mfrow=c(3,1),mar=c(3.5,3.5,1,1),mgp=c(2,1,0)) hist(af);hist(b);plot(af,b) ### What is the impact of the different assumptions of variance of allelic effects? (see Falconer & Mackay) h2_persnp_1<-2*af*(1-af)*rnorm(M,0,1)^2 h2_persnp_scaled<-2*af*(1-af)*rnorm(M,0,1/sqrt(2*af*(1-af)))^2 par(mfrow=c(2,1),mar=c(3.25,3.25,1,1),mgp=c(2,1,0)) plot(af,h2_persnp_1);plot(af,h2_persnp_scaled) ### Simulating the phenotypes gi<-as.vector(b%*%gt) ### Making the individuals' genotypic values Vg<-var(gi) hist(gi,breaks=50) ### Add in error ei<-rnorm(N,0,sqrt(Vg)) #Note this should have h2=0.5. Could vary this as well? Make a difference? Ve <-var(ei) hist(ei) ### Make phenotype: p <- gi + ei hist(p) Vp<-var(as.vector(p)) ############################################################################################ ### Adjusting heritability: altering the impact of genetic and environmental variance ### Could normalize first, which makes the math a bit easier to think about heritability: h2<-.8 ### Narrow-sense heritability to simulate ngi<-(gi-mean(gi))/sd(gi) ngi_scale<-scale(gi) #Built-in function to normalize the dat plot(ngi,ngi_scale) Vgn<-var(ngi) print(Vgn) nei<-rnorm(N,0,sqrt((1-h2)*Vgn/h2)) Ven<-var(nei) pn<-ngi+nei hist(pn) var(pn)==Vgn+Ven+2*cov(ngi,nei) h2_hat<-Vgn/(var(pn)) #### The heritability from the simulated sample. Why doesn't this equal h2=0.8 exactly? h2_hat ############################################################################################ ###### Very simple association analysis for binary trait k<-.3 ### Population prevalence bp<-rep(0,N) bp[which(pn>quantile(pn,1-k))]<-1 ### Assuming that the sample is a random sample wrt disease sum(bp)/N library(foreach) # setting up environment to use multiple cores if possible library(doMC) registerDoMC(cores=4) fitted<-foreach(nn=1:M,.combine=rbind)%dopar%{ Atmp<-N*cor(bp,gt[nn,])^2 #### Armitage trend test c(Atmp,1-pchisq(Atmp,1)) } ### Otherwise: fitted<-NULL for(nn in 1:M){ Atmp<-N*cor(bp,gt[nn,])^2 #### Armitage trend test fitted<-rbind(fitted,c(Atmp,1-pchisq(Atmp,1))) } par(mfrow=c(3,1),mar=c(3.25,3.25,1,1),mgp=c(2,1,0)) plot(fitted[,1],ylab="association statistic (X2)",xlab="Locus index") plot(-log10(fitted[,2]),ylab = "-log10 (p-value)",xlab="Locus index") plot(fitted[,1],-log10(fitted[,2]),ylab = "-log10 (p-value)",xlab="association statistic (X2)")