R version 3.1.2 (2014-10-31) -- "Pumpkin Helmet"
Copyright (C) 2014 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> PCs <- read.delim(file="dutch_1kG.R.evec",skip=1,head=FALSE,sep="")
> 
> 	## 1. plot ##
> 
> # choose colors for plot
> cls=as.character(PCs[,13])
> cls[cls=="3"]="#FFC125"	# Dutch individuals
> cls[cls=="4"]="#98F5FF"	# CEPH individuals
> cls[cls=="5"]="#6495ED"	# British individuals from England and Scotland
> cls[cls=="6"]="#B9D3EE"	# HapMap Finnish individuals from Finland
> cls[cls=="7"]="#EEEE00"	# Iberian populations in Spain
> cls[cls=="8"]="#8B7500"	# Toscan individuals
> cls[cls=="9"]="#CD3333"	# Han Chinese in Beijing
> cls[cls=="10"]="#8B2323"	# Han Chinese South
> cls[cls=="11"]="#FF7F50"	# Japanese individuals
> cls[cls=="12"]="#6B8E23"	# Luhya individuals
> cls[cls=="13"]="#BCEE68"	# Yoruba individuals
> 
> # plot PC1 & PC2
> pdf(file="1000 Genomes_PC1_PC2.pdf",width=9.5,height=6)
> par(xpd=T, mar=par()$mar+c(0,0,0,20))
> plot(PCs[,4],PCs[,3],col=cls,type="p",pch=20,bty="l",xlab="PC 2 (from 1000 Genomes)",ylab="PC 1 (from 1000 Genomes)")
> legend(.06,.03,bty="n",legend=c(
+ "Dutch individuals",
+ "CEPH individuals",
+ "British individuals from England and Scotland",
+ "HapMap Finnish individuals from Finland",
+ "Iberian populations in Spain",
+ "Toscan individuals",
+ "Han Chinese in Beijing",
+ "Han Chinese South",
+ "Japanese individuals",
+ "Yoruba individuals",
+ "Luhya individuals"),
+ col=c(
+ "#FFC125",
+ "#98F5FF",
+ "#6495ED",
+ "#B9D3EE",
+ "#EEEE00",
+ "#8B7500",
+ "#CD3333",
+ "#8B2323",
+ "#FF7F50",
+ "#BCEE68",
+ "#6B8E23"),
+ lty = c(-1), pch = c(20),lwd=c(-1))
> par(mar=c(5, 4, 4, 2) + 0.1)
> dev.off()
null device 
          1 
> 
> 	## 2. filter out Dutch individuals scoring higher or lower than maximum or minimum European values respectively ##
> 
> # first, we export all Europen individuals from 1000 Genomes into a matrix called EUR:
> EUR <- PCs[which(PCs$V13 < 9 & PCs$V13 > 3),]
> 
> # then we make variables with the highest and lowest values of European individuals (for PC1 and PC2): 
> PC1max <- max(EUR$V3)
> PC1min <- min(EUR$V3)
> PC2max <- max(EUR$V4)
> PC2min <- min(EUR$V4)
> 
> # than we make a matrix called outliers with all Dutch individuals scoring higher or lower than those European maximum and minimum values:
> outliers <- PCs[which((PCs$V13 == 3) & (PCs$V3 > PC1max | PCs$V3 < PC1min | PCs$V4 > PC2max | PCs$V4 < PC2min)),]
> 
> # and finally we export the IDs of those Dutch individuals with non-European ancestry to a file outliers.txt
> write.table(outliers[,1:2],file="outliers.txt",quote=FALSE,sep="\t",row.names=F,col.names=FALSE)
> 
> proc.time()
   user  system elapsed 
  0.300   0.152   0.293