########################################################################################################### ##### Simulate and analyze digenic epistasis for univariate SNP data ##### The notation is that developed by Robson (see Mather and Jinks, Biometrical Genetics, 1982, London: Chapman Hall, Ch. 5). ##### Different values of h, i, j and l generate different patterns of epistasis. ##### For example: da=db=ha=hb=iab=jab=jba=lab yields the 9:7 F2 segregation ratio of "complementary" interaction (circuits in series); ##### da=db=ha=hb=-iab=-jab=-jba=-lab yields the 15:1 ratio of "duplicate gene" interaction (circuits in parallel) ########################################################################################################### ## Supply constants set.seed(62438) # Seed for random number generation (setting seed allows simulations to be repeated from same starting point) NSNP<-2 # of loci N<-1600 # of subjects K<-1 # of variables VE<- 0.001 # Residual (e.g. "environmental") variance se<- sqrt(VE) # Residual standard error # Supply freqs of "increasing" SNPs PSNP<-c(0.5,0.5) PSNP # Supply homozygous, heterozygous and epistatic effects for digenic model #Supply homozygous (d[locus]) effects at each locus affecting phenotype d<-c(1,1) h<-c(1,1) # Heterozygous effects i<- -1 # d x d epistasis l<- -1 # h x h epistasis jab<- -1 # d x h epistasis jba<- -1 # h x d epistasis # Set up matrix for homologues of SNP genotypes (assume independence of SNPs) G<-array(dim=c(N,2,NSNP)) # Set up matrices for coefficients of homozygous, heterozygous and epistatic genotypic effects Dtype<-matrix(nrow=N,ncol=NSNP,byrow=TRUE) # Array for coeffients of additive effects Htype<-matrix(nrow=N,ncol=NSNP,byrow=TRUE) # Array for coeffients of heterozygous effects Itype<-matrix(nrow=N,ncol=1,byrow=TRUE) # Array for coeffients of additive x additive epistatic effects Ltype<-matrix(nrow=N,ncol=1,byrow=TRUE) # Array for coeffients of dominance x dominance epistatic effects Jabtype<-matrix(nrow=N,ncol=1,byrow=TRUE) # Array for coeffients of additive x dominance epistatic effects Jbatype<-matrix(nrow=N,ncol=1,byrow=TRUE) # Array for coeffients of dominance x additive epistatic effects # The isub loop is done for each subject. The isnp loop is done for each locus. These loops are done for both homologues # Simulate halotypes at two loci for(ichrom in 1:2){ for(isub in 1:N){ for(isnp in 1:NSNP){ pp<-PSNP[isnp] G[isub,ichrom,isnp]<-rbinom(1,1,pp) # rbinom samples a binomial outcome (1 or 0) where the probability of success ("1" allele) is pp. }} } # So, G[i,j,k] is the allele (0 or 1) of the ith subject (1:N) on the jth chromosome (1:2) at the kth locus (1:NSNP) # Generate coefficients of genotypic effects from 0/1 alleles # Code additive (homozygous) effects of genotypes as -1,0,1 Dtype[,]<-G[,1,]+G[,2,]-1 # Compute coefficients of heterozygous ("dominance" deviations) Htype[,]<-1-Dtype[,]*Dtype[,] # Compute coefficients of additive x additive interactions Itype[ ]<- Dtype[,1]*Dtype[,2] # Coefficients of dominance x dominance interactions Ltype[ ]<- Htype[,1]*Htype[,2] # Coefficients of additive x dominance interactions Jabtype[ ]<-Dtype[,1]*Htype[,2] # Coeficients of dominance x additive interactions Jbatype[ ]<-Htype[,1]*Dtype[,2] Dtype[1:10, 1:2] # Generate Genetic Effects Ey<-matrix(nrow=N,ncol=1,byrow=TRUE) Ey[ ]<-Dtype[,1]*d[1]+Dtype[,2]*d[2]+Htype[,1]*h[1]+Htype[,2]*h[2] # Additive and heterozygous effects # Add in epistatic effects Ey[ ]<- Ey[ ] + Itype[ ]*i[ ] + Jabtype[ ]*jab[ ] + Jbatype[ ]*jba[ ] + Ltype[ ]*l[ ] # Simulate random environmental effects E<-rnorm(N,sd=se) Y<-matrix(nrow=N,ncol=1,byrow=TRUE) # Vector of phenotypic values Y[ ]<-Ey[ ]+ E[ ] # Plot Phenotypes hist(Y) # If phenotypes are categorical (i.e. VE=0) then can also tabulate phenotype frequencies (e.g. segregation ratios) # table(Y) ################################################################# # Bind coefficients of genetic model and environment into matrix (Not needed here!) #X<-matrix(nrow=N,ncol=9,byrow=TRUE) # Vector of phenotypic values #X[,]<-cbind(Dtype[,1],Dtype[,2],Htype[,1],Htype[,2],Itype[ ],Jabtype[ ],Jbatype[ ],Ltype[ ],E[ ]) ################################################################# # Estimate parameters of some linear models for effects of genes and environment lm.E<-lm(Y~E) # Fit environment lm.D<-lm(Y~Dtype) # Fit additive genetic effects lm.DH<-lm(Y~Dtype+Htype) # Fit additive and dominance effects lm.G<-lm(Y~Dtype+Htype+Itype+Jabtype+Jbatype+Ltype) # Fit additive, dominant and epistatic effects # Now you can summarize and play with the model-fitting results, e.g.: summary(lm.G) anova(lm.G)