args <- commandArgs(trailingOnly = TRUE) intab <- args[1] outfile <- args[2] k <- args[3] inTable=read.table(intab, sep="\t", row.names=4, header=TRUE) pdf(outfile, width=9, height=7) library(gplots) #dnaseSig=as.matrix(sapply(inTable[,4:15], as.numeric)) #c(21,27) dnaseSig=as.matrix(sapply(inTable[,c(21,27)], as.numeric)) #d=as.matrix(sapply(data, as.numeric)) #only label rows if there are only a few # gaggleUtil.R: general purpose (primarily convenience) R functions developed for microarray # data in the gaggle ##------------------------------------------------------------------------------------------------ normalize <- function (m) # return a matrix which in which each row is normalized { result <- m for (r in 1:dim (m)[1]) { avg <- mean (m [r,], na.rm=TRUE) variance <- var (m [r,], na.rm=TRUE) result [r,] <- result [r,] - avg #center #if (result[r,] != 0) { #result [r,] <- result [r,] / sqrt (variance) #scale #} } invisible (result) } #row normalize dnaseSigRnorm=round(normalize(dnaseSig), digits=5) dnaseSigRnorm[1:4,] #try pam/clara, warnings plus doesn't finish #library(cluster) #w=clara(dnaseSigRnorm, 4, samples=50, sampsize=10000) #or = order(w$clusters) #determine k to use for kmeans??? # Determine number of clusters from (http://www.statmethods.net/advstats/cluster.html) #wss <- (nrow(dnaseSigRnorm)-1)*sum(apply(dnaseSigRnorm,2,var)) #for (i in 2:15) wss[i] <- sum(kmeans(dnaseSigRnorm, centers=i, iter.max=40)$withinss) #plot(1:15, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares") #look for bend in plot # K-Means Cluster Analysis #w = kmeans(dnaseSigRnorm, k, iter.max = 40) #warnings() #or = order(w$cluster) #convert to numbers first so can control order? peak=inTable[,28] peak=gsub("G1E", 1, peak) peak=gsub("Shared", 2, peak) peak=gsub("ER4", 3, peak) class(peak)<-"numeric" type=inTable[,93] type=gsub("Other", 1, type, perl=TRUE) type=gsub("Insulator", 2, type, perl=TRUE) type=gsub("Enhancer", 3, type, perl=TRUE) type=gsub("Promoter", 4, type, perl=TRUE) class(type)<-"numeric" or=order(type, peak) #clus=list() #for (i in 1:max(w$cluster)) { #c=(length(w$cluster[w$cluster<=i])) #clus=c(clus, c) #} #heatmap.2 overrides mfrow, need image to combine different maps and colors #par(mfrow=c(1,2)) layout(matrix(c(1,2,3,4,5),1,5,byrow=T), widths=c(.25,.25,.75,.75,1.5), heights=c(4,1), respect=T) par(mar=c(4,.5,2,.1), xpd=TRUE) #bottom, left, top, right # 43448 Enhancer 20313 Insulator 48399 Other 14249 Promoter d=inTable[,93] d=gsub("Other", 1, d, perl=TRUE) d=gsub("Insulator", 2, d, perl=TRUE) d=gsub("Enhancer", 3, d, perl=TRUE) d=gsub("Promoter", 4, d, perl=TRUE) typeCol=c("grey", "blue", "orange", "darkred") breaks=c(0,1.1,2.1,3.1,4.1) class(d)<-"numeric" image(1:2,1:length(d),t(d[or]), col=typeCol, xlab="", ylab="", axes=F, main="Type", ylim=c(1,length(d)), breaks=breaks) par(mar=c(4,.1,2,.1), xpd=TRUE) #G1E=Blue, shared=purple, ER4=red peakCol=c(rgb(0,0,155,maxColorValue=255), rgb(178,102,255,maxColorValue=255), rgb(155,0,0,maxColorValue=255)) d=inTable[,28] d=gsub("G1E", 1, d) d=gsub("Shared", 2, d) d=gsub("ER4", 3, d) class(d)<-"numeric" breaks=c(0,1.1,2.1,3.1) image(1:2,1:length(d),t(d[or]), col=peakCol, xlab="", ylab="", axes=F, main="Peaks", ylim=c(1,length(d)), breaks=breaks) #plot centered signal of pools #set breaks for colors s=quantile(dnaseSigRnorm, c(.05)) e=quantile(dnaseSigRnorm, c(.95)) cnt=40 step = (e - s)/cnt breaks = c(seq(s,e,by=step)) dim(breaks) breaks[1]=min(dnaseSigRnorm) breaks[cnt+1]=max(dnaseSigRnorm) sigBreaks=breaks d=cbind(dnaseSigRnorm[,1],dnaseSigRnorm[,2]) t=dnaseSigRnorm image(1:ncol(d),1:nrow(d),t(d[or,]), col=colorpanel(40, low = "grey", mid = "yellow", high = "red"), xlab="", ylab="", axes=F, breaks=breaks, main = "DNase signal", ylim=c(1,nrow(d))) axis(side=1, at=c(1:2), labels=c("G1E","ER4"), tick=FALSE) #for (i in num) #{ #a=length(w$cluster[w$cluster<=i]) #abline(h=(a+0.5),col='black') #} #num=1:max(w$cluster) stateCol=c(rgb(0,153,102,maxColorValue=255),rgb(153,204,0,maxColorValue=255),rgb(255,255,0,maxColorValue=255),rgb(255,153,0,maxColorValue=255), rgb(255,0,0,maxColorValue=255), rgb(102,255,255,maxColorValue=255), rgb(155,155, 155,maxColorValue=255), rgb(0,0,255,maxColorValue=255), rgb(153,0,255,maxColorValue=255)) #create matrix of states d=cbind(colnames(inTable[,66:74])[max.col(inTable[,66:74])], colnames(inTable[,75:83])[max.col(inTable[,75:83])]) d=gsub(".*Seg(\\d)", "\\1", d, perl=TRUE) class(d)<-"numeric" breaks=c(0,1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1) image(1:ncol(d),1:nrow(d),t(d[or,]), col=stateCol, xlab="", ylab="", axes=F, main="ChromHMM states", ylim=c(1,nrow(d)), breaks=breaks) axis(side=1, at=c(1:2), labels=c("G1E","ER4"), tick=FALSE) par(mar=c(4,.1,2,10), xpd=TRUE) #GATA1/2=47,40 TAL1=49,42 PolII=48,41 CTCF=43,36 d=cbind(inTable[,47], inTable[,40], inTable[,49], inTable[,42], inTable[,43], inTable[,36]) tfCol=c("white", "black"); breaks=c(0,.5,1); labels=c("GATA2 G1E","GATA1 ER4","TAL1 G1E","TAL1 ER4","CTCF G1E","CTCF ER4") colnames(d)<-labels image(1:ncol(d),1:nrow(d),t(d[or,]), col=tfCol, ylab="", xlab="", axes=F, main="TF binding", ylim=c(1,nrow(d)), breaks=breaks) axis(side=1, at=c(1:6), srt=180, labels=FALSE, tick=FALSE) text(1:6, par("usr")[3] - 0.2, labels = labels, srt = 90, adj = 1, xpd = TRUE, cex=.8) legend("topright",inset=c(-1,0),title="ChromHMM States",fill=stateCol,legend=c('1','2','3','4','5','6','7','8','9')) legend("topright",inset=c(-1,.24),title="Type",fill=typeCol,legend=c("Other","Insulator","Enhancer","Promoter")) legend("topright",inset=c(-1,.38), title="Peaks",fill=peakCol,legend=c("G1E","Shared","ER4")) #color.bar <- function(lut, min, max=-min, nticks=11, ticks=seq(min, max, len=nticks), title='') { #scale = (length(lut)-1)/(max-min) #plot(c(0,10), c(min,max), type='n', bty='n', xaxt='n', xlab='', yaxt='n', ylab='', main=title) #axis(2, ticks, las=1) #for (i in 1:(length(lut)-1)) { #y = (i-1)/scale + min #rect(0,y,10,y+1/scale, col=lut[i], border=NA) #} #} #legend_image <- as.raster(matrix(colfunc(20), ncol=1)) #plot(c(0,2),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = 'legend title') #text(x=1.5, y = seq(0,1,l=5), labels = seq(0,1,l=5)) #rasterImage(legend_image, 0, 0, 1,1) #levelplot()? #draw.colorkey(key, draw=FALSE, vp=NULL) library(lattice) library(grid) #viewport is 0,0 bottom left: 1,1 topright draw.colorkey(list(col=colorpanel(40, low = "grey", mid = "yellow", high = "red"), at=sigBreaks, height=.2, width=.8), draw=TRUE, vp=viewport(x=.8, y=.4)) dev.off() q()