args <- commandArgs(trailingOnly = TRUE) intab <- args[1] outfile <- args[2] cnames <- args[3] k <- args[4] data=read.table(intab, sep="\t", row.names=1) pdf(outfile) cn=strsplit(cnames, ',') library(gplots) d=as.matrix(sapply(data, as.numeric)) #d=round(log2(d + 1.1), digits=2) colnames(d)=unlist(cn) #only label rows if there are only a few labels=FALSE if (nrow(d)<50) { labels=rownames(data) } # gaggleUtil.R: general purpose (primarily convenience) R functions developed for microarray # data in the gaggle ##------------------------------------------------------------------------------------------------ normalize <- function (m) # return a matrix which in which each row is normalized { result <- m for (r in 1:dim (m)[1]) { avg <- mean (m [r,], na.rm=TRUE) variance <- var (m [r,], na.rm=TRUE) result [r,] <- result [r,] - avg #center result [r,] <- result [r,] / sqrt (variance) #scale } invisible (result) } #row normalize nd = normalize(d) #determine k to use for kmeans??? # Determine number of clusters from (http://www.statmethods.net/advstats/cluster.html) wss <- (nrow(nd)-1)*sum(apply(nd,2,var)) for (i in 2:15) wss[i] <- sum(kmeans(nd, centers=i, iter.max=20)$withinss) plot(1:15, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares") #look for bend in plot # K-Means Cluster Analysis w = kmeans(nd, k, iter.max = 20) or = order(w$cluster) clus=list() #clusn=list() for (i in 1:max(w$cluster)) { c=(length(w$cluster[w$cluster<=i])) #n=(length(w$cluster[w$cluster==i])) #p=(length(w$cluster[w$cluster