library(MASS) library(gwsem) source("setup.R") set.seed(12345) # ------- Generate data for a 3 indicator factor model loadings <- as.matrix(c(.7, .7, .7)) res <- c(1-loadings^2) sigma <- loadings %*% t(loadings) + diag(res) phenoData <- mvrnorm(numPeople, mu=rep(0,3), Sigma = sigma, empirical=TRUE) colnames(phenoData) <- indicatorNames phenoData <- as.data.frame(phenoData) # ------- Add hits for the latent factor hits <- list() hits[['substanceUse']] <- sample.int(numSNP, 2) m1 <- buildItem(phenoData, depVar = "tobacco") snp1 <- GWAS(m1, paste0(dir, 'plink2.pgen'), out = 'result.log', SNP = hits[['substanceUse']][1]) snp2 <- GWAS(m1, paste0(dir, 'plink2.pgen'), out = 'result.log', SNP = hits[['substanceUse']][2]) phenoData <- (phenoData + .1 * snp1$data$observed$snp + .08 * snp2$data$observed$snp) # ------- Add hits specific to each indicator for (ex in 1:length(indicatorNames)) { hits[[ indicatorNames[ex] ]] <- sample.int(numSNP, 2) myhit <- hits[[ indicatorNames[ex] ]] snp3 <- GWAS(m1, paste0(dir, 'plink2.pgen'), out = 'result.log', SNP = myhit[1]) snp4 <- GWAS(m1, paste0(dir, 'plink2.pgen'), out = 'result.log', SNP = myhit[2]) phenoData[[ indicatorNames[ex] ]] <- (phenoData[[ indicatorNames[ex] ]] + .1 * snp3$data$observed$snp + .075 * snp4$data$observed$snp) } # ------- Add principle components (covariates) for (ix in 1:numPCs) { phenoData[[ paste0('pc',ix) ]] <- rnorm(nrow(phenoData)) } # ------- write to disk write.table(phenoData, file=paste0(dir,"plink2.psam"), sep='\t', row.names = FALSE) write.table(unlist(hits), file=paste0(dir,"hits.csv"), col.names = FALSE)