CoXpress Tutorial

# please note that there is are extensive examples both in the coXpress help:
library(coXpress)
?coXpress

# and on the coXpress website: 
# http://coxpress.sf.net/code.R

# Step 1: reading your data in
# Examples are provided here of reading in both a text file (golub.txt) and an
# Excel spreadsheet (golub.xls).  The data should have rows representing genes 
# and columns representing arrays.  There should be one column containing 
# unique gene identifiers.

# If your data is in a text file....
# here we use the read.table command.  This command reads in text files.  See
# ?read.table for help.  Our gene identifiers are in row 1, and so we pass 1
# to the row.names argument
golub.df <- read.table("golub.txt", sep="\t", header=TRUE, row.names=1)

# If your data is in an Excel Spreadsheet
# here we use the RODBC library 
library(RODBC)

# we tell it where our spreadsheet is
xl <- odbcConnectExcel("golub.xls")

# we tell it what the worksheet is called sheet1
# this time we tell it the name of the column containing our gene identifiers
golub.df <- sqlFetch(xl, "golub", rownames="Gene")

# once we have 
odbcCloseAll()

# Step 2: Cluster data based on a subset of experiments.
# The golub data has 27 ALL cases and 11 ALL cases.  We will
# cluster on the 27 ALL cases.  These are in columns 1-27
hc.gene  <- cluster.gene(golub.df[,1:27],s="pearson",m="average")
       
# Step 3: cut the tree at a height of 0.4 (equates to pearson correlation of 
# 0.6).  This choice of cut off is user defined and should be in the range
# 0 < h < 2.  A low value will produce many groups with few genes in each group;
# a large value will produce few groups with many genes
g <- cutree(hc.gene, h=0.4)
       
# Step 4: examine the difference between ALL and AML samples.  The AML samples
# are in columns 28-38
cox <- coXpress(golub.df, g, 1:27, 28:38)
       
# Step 5: the results are a data.frame, with one row for each group of genes.
# see '?coXpress' for a longer explanation of the columns and their meanings
# examine the top 30 results
cox[1:30,]
  
# Step 6: examine groups of interest graphically     
# look at group 21
plot.compare.group(golub.df,g,21,1:27,28:38,
                    scale.center=TRUE,scale.scale=TRUE,
                    ylim=c(-5,5))
                         
inspect.group(golub.df,g,21,1:27,28:38)

# look for groups of genes with >= 10 members
cox[cox$N>=10,]
       
# look for groups of genes with >= 8 members
# that are non-random in group 1 and random 
# in group 2
cox[cox$N>8 & cox$pr.g1<=.05 & cox$pr.g2>=.05, ]