library(MASS)
library(gwsem)

source("setup.R")
set.seed(12345)

# ------- Generate data for a 3 indicator factor model
loadings <- as.matrix(c(.7, .7, .7))
res <- c(1-loadings^2)
sigma <- loadings %*% t(loadings) + diag(res)
phenoData <- mvrnorm(numPeople, mu=rep(0,3), Sigma = sigma, empirical=TRUE)
colnames(phenoData) <- indicatorNames
phenoData <- as.data.frame(phenoData)

# ------- Add hits for the latent factor

hits <- list()
hits[['substanceUse']] <- sample.int(numSNP, 2)

m1 <- buildItem(phenoData, depVar = "tobacco")
snp1 <- GWAS(m1, paste0(dir, 'plink2.pgen'),
     out = 'result.log', SNP = hits[['substanceUse']][1])
snp2 <- GWAS(m1, paste0(dir, 'plink2.pgen'),
             out = 'result.log', SNP = hits[['substanceUse']][2])
phenoData <- (phenoData +
     .1 * snp1$data$observed$snp +
     .08 * snp2$data$observed$snp)

# ------- Add hits specific to each indicator

for (ex in 1:length(indicatorNames)) {
  hits[[ indicatorNames[ex] ]] <- sample.int(numSNP, 2)
  myhit <- hits[[ indicatorNames[ex] ]]
  snp3 <- GWAS(m1, paste0(dir, 'plink2.pgen'),
               out = 'result.log', SNP = myhit[1])
  snp4 <- GWAS(m1, paste0(dir, 'plink2.pgen'),
               out = 'result.log', SNP = myhit[2])
  
  phenoData[[ indicatorNames[ex] ]] <-
    (phenoData[[ indicatorNames[ex] ]] +
      .1 * snp3$data$observed$snp +
      .075 * snp4$data$observed$snp)
}

# ------- Add principle components (covariates)

for (ix in 1:numPCs) {
  phenoData[[ paste0('pc',ix) ]] <- rnorm(nrow(phenoData))
}

# ------- write to disk

write.table(phenoData, file=paste0(dir,"plink2.psam"),
            sep='\t', row.names = FALSE)

write.table(unlist(hits), file=paste0(dir,"hits.csv"),
            col.names = FALSE)