#!/usr/bin/env Rscript # Read in PGI df_PGI <- read.table("PGI_height_1kG_ph3_EUR_SBayesR.sscore", header=T) # PGI histogram png('PGI_histogram.png', height=6, width=6, units='in', res=300) hist(df_PGI$SCORE1_AVG, breaks=30) dev.off() # Standardize PGI df_PGI$PGI <- scale(df_PGI$SCORE1_AVG)[, 1] # Read in phenotype data HEIGHT <- read.table("/home/aysu/2025/PGIpractical/data/HEIGHT.pheno", header=T) # Merge with PGI df <- merge(HEIGHT, df_PGI, by=c("IID")) # Linear model w/o PCs model_linear <- lm(height ~ PGI + sex, data=df) summary(model_linear) # Incremental-R^2 (Q5.3) model_linear_baseline <- lm(height ~ sex, data=df) summary(model_linear)$r.squared - summary(model_linear_baseline)$r.squared # Add PCs to the models PC <- paste0("PC", 1:10) PC <- paste0(PC, collapse="+") model_linear_baseline <- lm(paste0("height ~ sex +", PC), data=df) model_linear <- lm(paste0("height ~ sex + PGI + ", PC), data=df) # Confidence intervals (Q5.5) bs_func <- function(s,PGI){ #set seed number set.seed(s) # draw indexes for resampling (with replacement) BOOT_IN = sample(1:nrow(df), nrow(df), replace=TRUE) # compute incremental R2 given the drawn indexes and PGI. summary(lm(paste0("height ~ sex +", PGI, "+", PC), df[BOOT_IN,]))$r.squared - summary(lm(paste0("height ~ sex + ", PC), df[BOOT_IN,]))$r.squared } rep = 1000 BOOT = sapply(1:rep, bs_func, "PGI") BOOT = sort(BOOT) BOOT[25] BOOT[975] # Create binary pheno df$heightBin <- ifelse(df$height < median(df$height), 0, 1) # Logistic models model_log_baseline <- glm(paste0("heightBin ~ sex +", PC), data=df, family=binomial(link="logit")) model_log <- glm(paste0("heightBin ~ sex + PGI + ",PC), data=df, family=binomial(link="logit")) summary(model_log) # Nagelkerke pseudo-R^2 (Q5.6) library(rcompanion) nagelkerke(model_log, null = model_log_baseline, restrictNobs = FALSE)