# # First R session 2018 # # Importing data # First make sure your data is in an easy to read format such as space, tab or CSV D <- read.table(“ozbmi2.txt”,header=TRUE) D <-read.table(“ozbmi2.txt”,na.strings=“-99”,header=TRUE) # CSV D <- read.table(“ozbmi2.csv”, sep=“,”header=TRUE) D <- read.csv(“ozbmi2.csv”, header=TRUE) # Exporting data # Tab delimited write.table(D, “newdata.txt”,sep=“\t”) # CSV write.csv(D, “newdata.csv”) # Checking data # list the variables in D names(D) # dimensions of D dim(D) # print the first 10 rows of D head(D, n=10) # referring to variables in D # format is Object$variable head(D$age, n=10) # Basic manipulation # You can make new variables within an existing object D$newage<- D$age*100 # Or overwrite a variable D$age<- D$age*100 # Or recode a variable D$catage <- ifelse(D$age > 30, c("older"), c("younger")) # Describing data # Mean and variance mean(D$age, na.rm =TRUE) var(D$age , na.rm =TRUE) #A bit more info summary(D$age) summary(D$age[which(D$agecat==1)]) #What about a categorical variable table(D$agecat) table(D$agecat,D$zyg) #Correlations cor(D$wt1,D$bmi1, use="complete") cor(D$ht1,D$bmi1, use="complete") #Basic plots #Histogram hist(D$age) #basic hist(D$age, breaks=12, col=‘red’) # Add labels hist(D$age, breaks=12, col='red', xlab='age in years',main='Histogram of age') # Kernal density plot density(D$age, na.rm = "TRUE") # returns the density data ##Kernal density plot by zygosity - don't run this #library(sm) #attach(D) ## create value labels #zyg.f <- factor(zyg, levels= seq(1,5),labels = c("MZF", "MZM", "DZF", "DZM", "DZOS")) ## plot densities #sm.density.compare(age, zyg, xlab="Years") #title(main="Years by ZYG") ## add legend #colfill<-c(2:(2+length(levels(zyg.f)))) #legend(.8,3, levels(zyg.f), fill=colfill)