#
# First R session 2018
#

# Importing data 
# First make sure your data is in an easy to read format such as space, tab or CSV
D <- read.table(“ozbmi2.txt”,header=TRUE)
D <-read.table(“ozbmi2.txt”,na.strings=“-99”,header=TRUE)
# CSV
D <- read.table(“ozbmi2.csv”, sep=“,”header=TRUE)
D <- read.csv(“ozbmi2.csv”, header=TRUE)

# Exporting data 
# Tab delimited
write.table(D, “newdata.txt”,sep=“\t”)
# CSV
write.csv(D, “newdata.csv”)

# Checking data 
# list the variables in D
	names(D)
# dimensions of D
	dim(D)
# print the first 10 rows of D
	head(D, n=10)
# referring to variables in D
# format is Object$variable
	head(D$age, n=10)

# Basic manipulation 
# You can make new variables within an existing object 
	D$newage<- D$age*100
# Or overwrite a variable
	D$age<- D$age*100
# Or recode a variable
D$catage <- ifelse(D$age > 30, c("older"), c("younger")) 

# Describing data
# Mean and variance
	mean(D$age, na.rm =TRUE)
	var(D$age , na.rm =TRUE)
#A bit more info
	summary(D$age)
	summary(D$age[which(D$agecat==1)])
#What about a categorical variable
	table(D$agecat)
	table(D$agecat,D$zyg)
#Correlations 
	cor(D$wt1,D$bmi1, use="complete")
	cor(D$ht1,D$bmi1, use="complete")

#Basic plots
#Histogram
	hist(D$age)
#basic
	hist(D$age, breaks=12, col=‘red’)
# Add labels 
	hist(D$age, breaks=12, col='red', xlab='age in years',main='Histogram of age')
# Kernal density plot
	density(D$age, na.rm = "TRUE") # returns the density data

##Kernal density plot by zygosity - don't run this
#library(sm)
#attach(D)
## create value labels
#zyg.f <- factor(zyg, levels= seq(1,5),labels = c("MZF", "MZM", "DZF", "DZM", "DZOS"))
## plot densities
#sm.density.compare(age, zyg, xlab="Years")
#title(main="Years by ZYG")
## add legend 
#colfill<-c(2:(2+length(levels(zyg.f))))
#legend(.8,3, levels(zyg.f), fill=colfill)