# please note that there is are extensive examples both in the coXpress help: library(coXpress) ?coXpress # and on the coXpress website: # http://coxpress.sf.net/code.R # Step 1: reading your data in # Examples are provided here of reading in both a text file (golub.txt) and an # Excel spreadsheet (golub.xls). The data should have rows representing genes # and columns representing arrays. There should be one column containing # unique gene identifiers. # If your data is in a text file.... # here we use the read.table command. This command reads in text files. See # ?read.table for help. Our gene identifiers are in row 1, and so we pass 1 # to the row.names argument golub.df <- read.table("golub.txt", sep="\t", header=TRUE, row.names=1) # If your data is in an Excel Spreadsheet # here we use the RODBC library library(RODBC) # we tell it where our spreadsheet is xl <- odbcConnectExcel("golub.xls") # we tell it what the worksheet is called sheet1 # this time we tell it the name of the column containing our gene identifiers golub.df <- sqlFetch(xl, "golub", rownames="Gene") # once we have odbcCloseAll() # Step 2: Cluster data based on a subset of experiments. # The golub data has 27 ALL cases and 11 ALL cases. We will # cluster on the 27 ALL cases. These are in columns 1-27 hc.gene <- cluster.gene(golub.df[,1:27],s="pearson",m="average") # Step 3: cut the tree at a height of 0.4 (equates to pearson correlation of # 0.6). This choice of cut off is user defined and should be in the range # 0 < h < 2. A low value will produce many groups with few genes in each group; # a large value will produce few groups with many genes g <- cutree(hc.gene, h=0.4) # Step 4: examine the difference between ALL and AML samples. The AML samples # are in columns 28-38 cox <- coXpress(golub.df, g, 1:27, 28:38) # Step 5: the results are a data.frame, with one row for each group of genes. # see '?coXpress' for a longer explanation of the columns and their meanings # examine the top 30 results cox[1:30,] # Step 6: examine groups of interest graphically # look at group 21 plot.compare.group(golub.df,g,21,1:27,28:38, scale.center=TRUE,scale.scale=TRUE, ylim=c(-5,5)) inspect.group(golub.df,g,21,1:27,28:38) # look for groups of genes with >= 10 members cox[cox$N>=10,] # look for groups of genes with >= 8 members # that are non-random in group 1 and random # in group 2 cox[cox$N>8 & cox$pr.g1<=.05 & cox$pr.g2>=.05, ]