User Tools

Site Tools


keller_and_evans_lab:gscan_db_ga_p

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
keller_and_evans_lab:gscan_db_ga_p [2017/02/14 21:14]
scott /* Phenotypes */
keller_and_evans_lab:gscan_db_ga_p [2019/10/31 12:28] (current)
66.249.87.23 ↷ Links adapted because of a move operation
Line 576: Line 576:
  
 ====== ARIC ====== ====== ARIC ======
- 
-(Hannah/Joyce to update this section following Framingham as a guide) 
- 
-===== ID Mapping ===== 
  
  
Line 864: Line 860:
 ### sort ARIC.AFR.covariates.ped | uniq > ARIC.AFR.covariates.ped ### sort ARIC.AFR.covariates.ped | uniq > ARIC.AFR.covariates.ped
  
- 
- 
- 
-===== Genotypes ===== 
  
  
Line 878: Line 870:
 ===== Phenotypes ===== ===== Phenotypes =====
  
-Description of phenotypes can be found here: {{file_mesa_phenotypes_-_final.pdf}}+Description of phenotypes can be found here: {{:file_mesa_phenotypes_-_final.pdf}}
  
  
 ====== eMERGE ====== ====== eMERGE ======
  
-(Hannah/Joyce to update following Framingham as a guide) 
  
  
 ===== Phenotypes ===== ===== Phenotypes =====
  
-Description of phenotypes can be found here{{file_emerge.pdf}}+ 
 + 
 +options(stringsAsFactors=F) 
 + 
 + 
 +### eMERGE is broken into different consent classes. We can conduct analyses on hmb, hmb-gso-nic, and  
 + 
 + 
 +emerge.hmb <- read.table("/work/KellerLab/dbGaP/eMERGE-MergedSet/PhenoGenotypeFiles/RootStudyConsentSet_phs000360.eMERGE_MergedSet.v3.p1.c1.HMB/PhenotypeFiles/phs000360.v3.pht003255.v2.p1.c1.MergedSet_Subject_Phenotypes.HMB.txt.gz", header=TRUE, sep="\t", stringsAsFactors=F) 
 +emerge.hmb.genos <- read.table("/work/KellerLab/dbGaP/eMERGE-MergedSet/PhenoGenotypeFiles/RootStudyConsentSet_phs000360.eMERGE_MergedSet.v3.p1.c1.HMB/GenotypeFiles/matrix/c1.HMB/eMerge_660_11212012_c1.fam", header=FALSE, sep="\t", stringsAsFactors=F) 
 + 
 + 
 +emerge.hmb.gso.nic <- read.table("/work/KellerLab/dbGaP/eMERGE-MergedSet/PhenoGenotypeFiles/RootStudyConsentSet_phs000360.eMERGE_MergedSet.v3.p1.c3.HM-B-GSO-NIC/PhenotypeFiles/phs000360.v3.pht003255.v2.p1.c3.MergedSet_Subject_Phenotypes.HM-B-GSO-NIC.txt.gz", header=TRUE, sep="\t", stringsAsFactors=F) 
 +emerge.hmb.gso.nic.genos <- read.table("/work/KellerLab/dbGaP/eMERGE-MergedSet/PhenoGenotypeFiles/RootStudyConsentSet_phs000360.eMERGE_MergedSet.v3.p1.c3.HM-B-GSO-NIC/GenotypeFiles/matrix/c3.HM-B-GSO-NIC/eMerge_660_11212012_c3.fam", header=FALSE, sep="\t", stringsAsFactors=F) 
 + 
 + 
 +emerge.hmb.gso <- read.table("/work/KellerLab/dbGaP/eMERGE-MergedSet/PhenoGenotypeFiles/RootStudyConsentSet_phs000360.eMERGE_MergedSet.v3.p1.c4.HMB-GSO/PhenotypeFiles/phs000360.v3.pht003255.v2.p1.c4.MergedSet_Subject_Phenotypes.HMB-GSO.txt.gz", header=TRUE, sep="\t", stringsAsFactors=F) 
 +emerge.hmb.gso.genos <- read.table("/work/KellerLab/dbGaP/eMERGE-MergedSet/PhenoGenotypeFiles/RootStudyConsentSet_phs000360.eMERGE_MergedSet.v3.p1.c4.HMB-GSO/GenotypeFiles/matrix/c4.HMB-GSO/eMerge_660_11212012_c4.fam", header=FALSE, sep="\t", stringsAsFactors=F) 
 + 
 + 
 +### Merge all files above according to SUBJID, which is used in the 
 +### genotype files. 
 + 
 +emerge <- merge(emerge.hmb, emerge.hmb.gso, all=T) 
 +emerge <- merge(d, emerge.hmb.gso.nic, all=T) 
 + 
 +### SMOKING INITIATION 
 +### 
 +### The eMERGE variable name is SMOKING_STATUS 
 +###      C65108 = never smoker 
 +###      C67147 = current smoker 
 +###      C67148 = past smoker 
 +###      C67151 = Unknown if ever smoked 
 +### 
 +### Descriptives: 
 +### 
 +### table(emerge$SMOKING_STATUS) 
 +### 
 +### C65108 C67147 C67148 C67151  
 +###   2217   1736   3457   9635  
 + 
 +si <- emerge$SMOKING_STATUS 
 +si[si == "C67147" | si == "C67148"] <- 2 
 +si[si == "C65108"] <- 1 
 +si[si != 1 & si != 2] <- NA 
 + 
 +### SMOKING Cessation 
 +### 
 +### Current == 2 & Former == 1 in GSCAN. This is already the case for these data. 
 + 
 +sc <- emerge$SMOKING_STATUS 
 +sc[sc == "C67147"] <- 2 
 +sc[sc == "C67148"] <- 1 
 +sc[sc != 1 & sc != 2] <- NA 
 + 
 + 
 +### eMERGE age variable is tricky because there is no obvious age at 
 +### assessment. We will use their "DECADE_BIRTH" as a terrible 
 +### approximation. 
 +### 1=1900-1919; 2=1920-1929, 3=1930-1939; 4=1940-1949; 5=1950-1959; 6=Unknown 
 +### 
 +### Descriptives: 
 +### 
 +### table(emerge$DECADE_BIRTH) 
 +### 
 +###      1    2    3    4    5    6    7    8    9   99  
 +###    612 2667 3533 4439 3127 1291  761  490   10  109  
 +birthyear <- emerge$DECADE_BIRTH 
 +birthyear[birthyear == "99"] <- NA 
 +birthyear[birthyear == "."] <- NA 
 + 
 + 
 +### SEX 
 +sex <- emerge$SEX 
 +sex[sex == "C46109"] <- 1 
 +sex[sex == "C46110"] <- 2 
 + 
 + 
 +### Scott decided not to correct for additional case-control variables 
 +### given what appears to be a highly complex sample and uncertainty 
 +### about the best course of action to account for disease status in 
 +### conducting smoking analyses. 
 + 
 +phenotypes <- data.frame(fid = emerge$SUBJID, 
 +                         iid = emerge$SUBJID, 
 +                         patid = "x", 
 +                         matid = "x", 
 +                         sex = sex, 
 +                         si = si, 
 +                         sc = sc) 
 + 
 +phenotypes[is.na(phenotypes)] <- "x" 
 + 
 +write.table(phenotypes, 
 +            "/work/KellerLab/vrieze/GSCAN/GWAS/summary_stats_generated_internally/eMERGE/GSCAN_eMERGE_phenotypes.ped", 
 +            row.names=F, 
 +            quote = F, 
 +            sep="\t"
 + 
 + 
 +covariates  <- data.frame(fid = emerge$SUBJID, 
 +                          iid = emerge$SUBJID, 
 +                          patid = "x", 
 +                          matid = "x", 
 +                          sex = sex, 
 +                          birthyear = birthyear)                          
 + 
 +covariates[is.na(covariates)] <- "x" 
 + 
 +write.table(covariates, 
 +            "/work/KellerLab/vrieze/GSCAN/GWAS/summary_stats_generated_internally/eMERGE/GSCAN_eMERGE_covariates.ped", 
 +            row.names=F, 
 +            quote = F, 
 +            sep="\t"
  
  
keller_and_evans_lab/gscan_db_ga_p.1487132064.txt.gz · Last modified: 2017/02/14 21:14 by scott