hotpal<-c(rgb(   0   ,   0   ,   0   ),
rgb(   0.058823529   ,   0   ,   0   ),
rgb(   0.117647059   ,   0   ,   0   ),
rgb(   0.176470588   ,   0   ,   0   ),
rgb(   0.235294118   ,   0   ,   0   ),
rgb(   0.294117647   ,   0   ,   0   ),
rgb(   0.352941176   ,   0   ,   0   ),
rgb(   0.411764706   ,   0   ,   0   ),
rgb(   0.470588235   ,   0   ,   0   ),
rgb(   0.529411765   ,   0.058823529   ,   0   ),
rgb(   0.588235294   ,   0.117647059   ,   0   ),
rgb(   0.647058824   ,   0.176470588   ,   0   ),
rgb(   0.705882353   ,   0.235294118   ,   0   ),
rgb(   0.764705882   ,   0.294117647   ,   0   ),
rgb(   0.823529412   ,   0.352941176   ,   0   ),
rgb(   0.882352941   ,   0.411764706   ,   0   ),
rgb(   0.941176471   ,   0.470588235   ,   0   ),
rgb(   1   ,   0.529411765   ,   0   ),
rgb(   1   ,   0.588235294   ,   0   ),
rgb(   1   ,   0.647058824   ,   0   ),
rgb(   1   ,   0.705882353   ,   0   ),
rgb(   1   ,   0.764705882   ,   0   ),
rgb(   1   ,   0.823529412   ,   0   ),
rgb(   1   ,   0.882352941   ,   0   ),
rgb(   1   ,   0.941176471   ,   0   ),
rgb(   1   ,   1   ,   0.141176471   ),
rgb(   1   ,   1   ,   0.28627451   ),
rgb(   1   ,   1   ,   0.42745098   ),
rgb(   1   ,   1   ,   0.57254902   ),
rgb(   1   ,   1   ,   0.71372549   ),
rgb(   1   ,   1   ,   0.858823529   ),
rgb(   1   ,   1   ,   1   ));

greypal<-c(
rgb(0,0,0),
rgb(0.1,0.1,0.1),
rgb(0.2,0.2,0.2),
rgb(0.3,0.3,0.3),
rgb(0.4,0.4,0.4),
rgb(0.5,0.5,0.5),
rgb(0.6,0.6,0.6),
rgb(0.7,0.7,0.7),
rgb(0.8,0.8,0.8),
rgb(0.9,0.9,0.9),
rgb(1,1,1));

# same as rs, but for probe comparisons within a data set for the same gene.
cross.corr.within<-function(data, array, method="pearson", minpresent=1) {
  rprobes<-vector(length=25000);
  sprobes<-vector(length=25000);
  gene.names<-vector(length=25000);
  values<-vector(length=25000);
  morevalues<-vector(length=25000);
  explev.s<-vector(length=25000);
  explev.r<-vector(length=25000);

  te<-function(x,y){cor.test(x,y, method=method, alternative="greater", na.action="omit")$p.value; };
  tm<-function(x,y){cor(x,y, use="pairwise.complete.obs", method=method)[1,1]};
  j<-1;
  genes<-as.vector(as.character(array[,"gene"]));
  unique.genes<-unique(genes);
  for (gene in unique.genes) { # find probes for this gene.
    if (gene == "---" || gene == "") {
      next;
  }
    probes<-row.names(array)[which(genes == gene )];
    if (length(probes) < 2) {
      next;
    }

    for(i in 1:length(probes)) {
      dat.a<-as.vector(data[probes[i],]);
      for (k in (i+1):length(probes)) {
        if (is.na(probes[k])) {
          break;
        }
         print(paste(gene, " ", probes[i], " ", probes[k]))
        dat.b<-as.vector(data[probes[k],]);
        try(silent=T,
            values[j]<-te(t(dat.a), t(dat.b)));
        morevalues[j]<-tm(t(dat.a), t(dat.b));
        explev.s[j]<-median(t(dat.a), na.rm=TRUE);
        explev.r[j]<-median(t(dat.b), na.rm=TRUE);
        rprobes[j]<-probes[i];
        sprobes[j]<-probes[k];
        gene.names[j]<-gene;
        j<-j+1;
        
      }
    }
  }
  values<-values[1:j-1];
  explev.s<-explev.s[1:j-1];
  explev.r<-explev.r[1:j-1];
  rprobes<-rprobes[1:j-1];
  sprobes<-sprobes[1:j-1];
  morevalues<-morevalues[1:j-1];
  gene.names<-gene.names[1:j-1];
  dm<-data.frame (cbind(gene.names, rprobes, sprobes), cbind(values, explev.r, explev.s, morevalues));
  names(dm)<-c("gene", "rprobe", "sprobe", "pvalue", "explev.r", "explev.s", "correl");
  dm;
}


                                        # compare two data sets - the expression vectors are compared to each
# other, gene by gene. This obviously only makes sense for special
# cases (such as the same dat aranalyzed more than once, or the nci60
# data)

rs<-function(rdataset, sdataset, rarray, sarray, method="pearson" , unroll=FALSE, test=FALSE, minpresent=1, f=NULL){
  # initialize the data structure. Must be a better way...
  rprobes<-vector(length=25000);
  sprobes<-vector(length=25000);
  gene.names<-vector(length=25000);
  values<-vector(length=25000);
  morevalues<-vector(length=25000);
  explev.s<-vector(length=25000);
  explev.r<-vector(length=25000);
  if ( unroll) {
    values<-matrix(nrow=100000, ncol=2);
  } 
  
  j<-1;
  te<-NULL;
  tm<-NULL;
  if (!is.null(f)) {
    te<-f;
  } else  if (test ){
    print("In pvalue mode");
    te<-function(x,y){cor.test(x,y, method=method, alternative="greater", na.action="omit")$p.value; };
    tm<-function(x,y){cor(x,y, use="pairwise.complete.obs", method=method)[1,1]};
  } else {
    te<-function(x,y){cor(x,y, use="pairwise.complete.obs", method=method)[1,1]};
  }
  totalpoints<-0;
  # we use the rarray genes as the "gold standard" list, arbitrarily.
  rgenes<-as.vector(as.character(rarray[,"gene"]));
  unique.genes<-unique(rgenes);
  sgenes<-as.vector(as.character(sarray[,"gene"]));
  ngenes.tested<-0
  for (gene in unique.genes) { # find probes for this gene.
    if (gene == "---" || gene == "") {
      next;
    }
    r.probes<-row.names(rarray)[which(rgenes == gene )]
    s.probes<-row.names(sarray)[which(sgenes == gene )]
    if (length(s.probes) == 0 || length(r.probes) == 0) {
      next;
    }

    for (rname in r.probes) {
      if (!is.null(dim(rdataset))) {
        rdat<-as.vector(rdataset[rname,]); # data for this probe.
      } else {
        rdat<-rdataset[rname];
      }


      if (length(which(!is.na(rdat) )) < minpresent) { # exclude data that is missing too much
#        print("skipping r");
        next;
      }
      for (sname in s.probes) {
        if (!is.null(dim(sdataset))) {
          sdat<-as.vector(sdataset[sname,]); # data for this probe.
        } else {
          sdat<-sdataset[sname];
        }


      if (length(which(!is.na(sdat) )) < minpresent) { # exclude data that is missing too much
#        print("skipping s");
        next;
      }
        
        
        if ( test) { # pvalue for correlation
          try(silent=T,
             values[j]<-te(t(rdat), t(sdat)));
          morevalues[j]<-tm(t(rdat), t(sdat));
          explev.s[j]<-median(t(sdat), na.rm=TRUE);
          explev.r[j]<-median(t(rdat), na.rm=TRUE);
          rprobes[j]<-rname;
          sprobes[j]<-sname;
          gene.names[j]<-gene;
          j<-j+1;
        } else if (unroll) { # the actual expression values.
          for(u in seq(along=rdat)) {
            if (is.na(rdat[u]) || is.na(sdat[u]) ) {
              next;
            }
            values[j,1] <- as.numeric(sdat[u]);
            values[j,2] <- as.numeric(rdat[u]);
            j<-j+1;
            totalpoints<-totalpoints+1;
          }
        } else { # the default.
          values[j]<-te(t(rdat), t(sdat));
          explev.s[j]<-median(t(sdat), na.rm=TRUE);
          explev.r[j]<-median(t(rdat), na.rm=TRUE);
          rprobes[j]<-rname;
          sprobes[j]<-sname;
          gene.names[j]<-gene;
          j<-j+1;
        }
      }
    }
    ngenes.tested<-ngenes.tested+1;
    if ((ngenes.tested %% 500) == 0 ) {
      write(paste(ngenes.tested, " genes tested"), stderr());
    }
  }

                                        # get rid of empty values.
  if (unroll) {
    values<-values[1:totalpoints,];
  } else  {
    values<-values[1:j-1];
    explev.s<-explev.s[1:j-1];
    explev.r<-explev.r[1:j-1];
    rprobes<-rprobes[1:j-1];
    sprobes<-sprobes[1:j-1];
    morevalues<-morevalues[1:j-1];
    gene.names<-gene.names[1:j-1];
  }

  if (test) {
    print("making data frame with pvalues and correlations");
          dm<-data.frame (cbind(gene.names, rprobes, sprobes), cbind(values, explev.r, explev.s, morevalues));
      names(dm)<-c("gene", "rprobe", "sprobe", "pvalue", "explev.r", "explev.s", "correl");
  } else {
      dm<-data.frame (cbind(gene.names, rprobes, sprobes), cbind(values, explev.r, explev.s));
  names(dm)<-c("gene", "rprobe", "sprobe", "correl", "explev.r", "explev.s");
   }
  
  dm;
}


# make graphs to show some randomly selected examples. Robust line fits using rlm.
#par(mfrow=c(5,10))
library(MASS)
rsplot<-function(rdataset=ross, sdataset, rarray=imr, sarray=u6k){
  m<-vector();
  j<-0;
  random<-round(runif(n=length(unique(as.character(rarray[,1]))), min=1,max=length(unique(as.character(rarray[,1])))), 0)
  for (i in unique(as.character(rarray[,1]))[random]) {
    rn<-row.names(rarray)[which(rarray[,1] == i)]
    sn<-row.names(sarray)[which(sarray[,1]==i)]
    if (length(sn) == 0) {
      next;
    }	
    for (rname in rn) {
      rdat<-rdataset[rname,];
      if (length(which(!is.na(rdat))) < 10) {
        next;
      }
      for (sname in sn) {
        sdat<-sdataset[sname,];
        if (j >= 50) { break; }
        plot(t(rdat), t(sdat), pch=20, cex=1, main=i, xlab="", ylab="");
        abline(rlm(t(sdat) ~ t(rdat)), col="blue")
        j<-j+1;
        
      }	
    }
    if (j >= 50) { break; }
  }
}


plotdi<-function(dataset="rma", probe) {
  gene<-NULL;
  if (dataset == "rma") {
    gene<-affy.annots[probe,1];
  } else {
    gene<-illu.annots[probe,1];
  }
  data<-eval(parse(text=paste(sep='', dataset, "[\"", probe, "\",]")))
  plot(dilution, data, pch=20, xlab="Dilution", ylab="Expression", main=paste(dataset, " ", probe, " : ", gene), cex=2)
  abline(lm(t(data) ~ dilution), col="blue")
}

# for 3' distance analysis
bestdist<-function (dataset) {
  d<-NULL;
  for(probe in as.character(unique(dataset[,"probe"]))) {
    best<-min(dataset[dataset$probe==probe, "threeprimeDist"]);
    d<-rbind(d,list(probe,best))
  }
  d<-data.frame(row.names=unlist(d[,1]), unlist(d[,2]))
  names(d)<-c("threeprimeDist")
  d
}

# Sun Mar 20 20:08:17 EST 2005, updated Mon Apr 11 14:35:14 EDT 2005 to include exon overlap.
# Wed Apr 20 15:53:16 EDT 2005 updated to filter out weak hits and count as "hits" only good hits.
# Mon Apr 25 16:58:20 EDT 2005 Improving performance.
bestgenes<-function(k) {
  d<-NULL;
  v<-0;
  alp<-as.character(k[,"probe"]);
  up<-unique(as.character(k[,"probe"]));
  blatscores<-k[,"Blat.Score"]
  exonoverlaps<-as.vector(k[,"exonoverlap"]);
  probelengths<-as.vector(k[,"Probe.Length"]);
  
  sapply(up, function(i) {
    rrs<-match(i, alp);
    maxscore<-0;
    maxindex<--1;
    maxblatscore<-0;
    num.hits<-0;
    maxoverlap<-0;
    sapply(rrs, function(r) {
      v<<-v+1;
      blatscore<-blatscores[r];
      if (blatscore >= 0.9) {
        overlap<<-exonoverlaps[r]/probelengths[r];
        score <- blatscore * overlap;
        if (score >= maxscore) {
          maxscore<<-score;
          maxblatscore<<-blatscore;
          maxindex<<-r;
          maxoverlap<<-overlap;
        }
        num.hits<<-num.hits+1;
      }
      if (v %% 100 == 0 ) { print(v); }
    });
    if (maxindex >= 0) {
      d<<-rbind(d,list(i, maxblatscore, as.character(k[maxindex,"Gene"]), as.character(k[maxindex,"Transcript"]), num.hits, k[maxindex,"threeprimeDist"], k[maxindex,"exonoverlap"]/k[maxindex,"Probe.Length"]));
    }
  });
#  d<-data.frame(row.names=unlist(d[,1]), unlist(d[,2]), unlist(d[,4]), unlist(d[,5]), unlist(d[,3]), unlist(d[,6]), unlist(d[,7]) )
#  names(d)<-c("Blat.Score", "Gene", "Transcript", "Num.hit.genes.or.transcripts", "three.prime.dist", "exonoverlap.fraction")
  d<-data.frame(row.names=unlist(d[,1]), unlist(d[,2]), unlist(d[,3]), unlist(d[,4]), unlist(d[,5]), unlist(d[,6]) )
  names(d)<-c("Blat.Score", "Gene", "Num.hit.genes.or.transcripts", "three.prime.dist", "exonoverlap.fraction")
  d
}


# Function to create a density plot of data. - a 2d histogram.
hist.2d<-function(x, y, xbreaks=100, ybreaks=100, countmax=-1, col=pal, xlab=NULL, ylab=NULL, main=NULL, ylim=NULL, xlim=NULL) {
  # create a grid
  grid<-matrix(0, nrow=ybreaks, ncol=xbreaks);
  labRow<-NULL;
  labCol<-NULL;

  # set up ranges.
  minx<-min(x, na.rm=T);
  miny<-min(y, na.rm=T);
  maxx<-max(x, na.rm=T);
  maxy<-max(y, na.rm=T);

  if (is.vector(ylim) && length(ylim) > 0) {
    miny<-ylim[1];
    maxy<-ylim[2];
  }
  
  if (is.vector(xlim) && length(xlim) > 0) {
    minx<-xlim[1];
    maxx<-xlim[2];
  }
  
  xbinsize<-(maxx - minx)/xbreaks;
  ybinsize<-(maxy - miny)/ybreaks;
#  names<-seq(minx, max(x)-xbinsize, by=xbinsize);
#  rownames<-seq(miny, max(y)-ybinsize, by=ybinsize);
#  grid<-data.frame(grid, row.names=as.character(rownames));
#  names(grid)<-as.character(names);

    # iterate over the data, filling in the grid
  i<-1;
  for(a in as.vector(x)) {
    if (!is.numeric(a) || is.na(a)) {
      next;
    }
    if (a < minx) {
      a<-minx;
    }
    if (a > maxx) {
      a<-maxx;
    }
    b<-as.vector(y)[i];

    if (!is.numeric(b) || is.na(b)) {
      next;
    }
    if (b < miny) {
      b<-miny;
    }
    if (b > maxy) {
      b<-maxy;
    }
    
    abin<- floor(( a - minx )/xbinsize);
    bbin<- floor(( b - miny)/ybinsize);
    grid[abin,bbin]<-grid[abin,bbin]+1;
    if (countmax >0) {
      grid[abin,bbin] <- min(grid[abin,bbin], countmax);
    }
    i<-i+1;
  }
#  grid <- grid / length(x);
  print(max(grid))
  heatmap(grid, Colv=NA, Rowv=NA, scale="none", col=col, xlab=ylab, ylab=ylab, main=main, labRow=labRow, labCol=labCol);
#  grid;
}