Home > RAVEN > scoreModel.m

scoreModel

PURPOSE ^

scoreRxns

SYNOPSIS ^

function [rxnScores geneScores hpaScores arrayScores]=scoreModel(model,hpaData,arrayData,tissue,celltype,noGeneScore,multipleGeneScoring,multipleCellScoring,hpaLevelScores)

DESCRIPTION ^

 scoreRxns
   Scores the reactions and genes in a model based on expression data
   from HPA and/or gene arrays

   model               a model structure
   hpaData             HPA data structure from parseHPA (opt if arrayData is 
                       supplied, default [])
   arrayData           gene expression data structure (opt if hpaData is
                       supplied, default [])
       genes           cell array with the unique gene names
       tissues         cell array with the tissue names. The list may not be
                       unique, as there can be multiple cell types per tissue
       celltypes       cell array with the cell type names for each tissue
       levels          GENESxTISSUES array with the expression level for
                       each gene in each tissue/celltype. NaN should be
                       used when no measurement was performed
   tissue              tissue to score for. Should exist in either
                       hpaData.tissues or arrayData.tissues
   celltype            cell type to score for. Should exist in either
                       hpaData.celltypes or arrayData.celltypes for this 
                       tissue (opt, default is to use the best values
                       among all the cell types for the tissue. Use [] if
                       you want to supply more arguments)
   noGeneScore         score for reactions without genes (opt, default -2)
   multipleGeneScoring determines how scores are calculated for reactions
                       with several genes ('best' or 'average')
                       (opt, default 'best')
   multipleCellScoring determines how scores are calculated when several
                       cell types are used ('best' or 'average')
                       (opt, default 'best')
   hpaLevelScores      structure with numerical scores for the expression
                       level categories from HPA. The structure should have a
                       "names" and a "scores" field (opt, see code for
                       default scores)

   rxnScores       scores for each of the reactions in model
   geneScores      scores for each of the genes in model. Genes which are
                   not in the dataset(s) have -Inf as scores
   hpaScores       scores for each of the genes in model if only taking hpaData
                   into account. Genes which are not in the dataset(s) 
                   have -Inf as scores
   arrayScores     scores for each of the genes in model if only taking arrayData
                   into account. Genes which are not in the dataset(s) 
                   have -Inf as scores
       
   Usage: [rxnScores geneScores hpaScores arrayScores]=scoreModel(model,...
               hpaData,arrayData,tissue,celltype,noGeneScore,multipleGeneScoring,...
               multipleCellScoring,hpaLevelScores)

   Rasmus Agren, 2012-09-11

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function [rxnScores geneScores hpaScores arrayScores]=scoreModel(model,hpaData,arrayData,tissue,celltype,noGeneScore,multipleGeneScoring,multipleCellScoring,hpaLevelScores)
0002 % scoreRxns
0003 %   Scores the reactions and genes in a model based on expression data
0004 %   from HPA and/or gene arrays
0005 %
0006 %   model               a model structure
0007 %   hpaData             HPA data structure from parseHPA (opt if arrayData is
0008 %                       supplied, default [])
0009 %   arrayData           gene expression data structure (opt if hpaData is
0010 %                       supplied, default [])
0011 %       genes           cell array with the unique gene names
0012 %       tissues         cell array with the tissue names. The list may not be
0013 %                       unique, as there can be multiple cell types per tissue
0014 %       celltypes       cell array with the cell type names for each tissue
0015 %       levels          GENESxTISSUES array with the expression level for
0016 %                       each gene in each tissue/celltype. NaN should be
0017 %                       used when no measurement was performed
0018 %   tissue              tissue to score for. Should exist in either
0019 %                       hpaData.tissues or arrayData.tissues
0020 %   celltype            cell type to score for. Should exist in either
0021 %                       hpaData.celltypes or arrayData.celltypes for this
0022 %                       tissue (opt, default is to use the best values
0023 %                       among all the cell types for the tissue. Use [] if
0024 %                       you want to supply more arguments)
0025 %   noGeneScore         score for reactions without genes (opt, default -2)
0026 %   multipleGeneScoring determines how scores are calculated for reactions
0027 %                       with several genes ('best' or 'average')
0028 %                       (opt, default 'best')
0029 %   multipleCellScoring determines how scores are calculated when several
0030 %                       cell types are used ('best' or 'average')
0031 %                       (opt, default 'best')
0032 %   hpaLevelScores      structure with numerical scores for the expression
0033 %                       level categories from HPA. The structure should have a
0034 %                       "names" and a "scores" field (opt, see code for
0035 %                       default scores)
0036 %
0037 %   rxnScores       scores for each of the reactions in model
0038 %   geneScores      scores for each of the genes in model. Genes which are
0039 %                   not in the dataset(s) have -Inf as scores
0040 %   hpaScores       scores for each of the genes in model if only taking hpaData
0041 %                   into account. Genes which are not in the dataset(s)
0042 %                   have -Inf as scores
0043 %   arrayScores     scores for each of the genes in model if only taking arrayData
0044 %                   into account. Genes which are not in the dataset(s)
0045 %                   have -Inf as scores
0046 %
0047 %   Usage: [rxnScores geneScores hpaScores arrayScores]=scoreModel(model,...
0048 %               hpaData,arrayData,tissue,celltype,noGeneScore,multipleGeneScoring,...
0049 %               multipleCellScoring,hpaLevelScores)
0050 %
0051 %   Rasmus Agren, 2012-09-11
0052 %
0053 
0054 if nargin<3
0055     arrayData=[];
0056 end
0057 if nargin<5
0058     celltype=[];
0059 end
0060 if nargin<6
0061     noGeneScore=-2;
0062 end
0063 if nargin<7
0064     multipleGeneScoring='best';
0065 end
0066 if nargin<8
0067     multipleCellScoring='best';
0068 end
0069 if nargin<9
0070     %The first four are for APE, the other ones for staining
0071     hpaLevelScores.names={'High' 'Medium' 'Low' 'None' 'Strong' 'Moderate' 'Weak' 'Negative'};
0072     hpaLevelScores.scores=[20 15 10 -8 20 15 10 -8];
0073 end
0074 
0075 if isempty(hpaData) && isempty(arrayData)
0076     throw(MException('','Must supply hpaData, arrayData or both'));    
0077 end
0078 if ~strcmpi(multipleGeneScoring,'best') && ~strcmpi(multipleGeneScoring,'average')
0079     throw(MException('','Valid options for multipleGeneScoring are "best" or "average"'));    
0080 end
0081 if ~strcmpi(multipleCellScoring,'best') && ~strcmpi(multipleCellScoring,'average')
0082     throw(MException('','Valid options for multipleCellScoring are "best" or "average"'));    
0083 end
0084 
0085 %This is so that the code can ignore which combination of input data that is
0086 %used
0087 if isempty(arrayData)
0088     arrayData.genes={};
0089     arrayData.tissues={};
0090     arrayData.celltypes={};
0091     arrayData.levels=[];
0092 end
0093 if isempty(hpaData)
0094     hpaData.genes={};
0095     hpaData.tissues={};
0096     hpaData.celltypes={};
0097     hpaData.levels={};
0098     hpaData.types={};
0099     hpaData.reliabilities={};
0100     hpaData.gene2Level=[];
0101     hpaData.gene2Type=[];
0102     hpaData.gene2Reliability=[];
0103 end
0104 
0105 %Check that the tissue exists
0106 if ~ismember(upper(tissue),upper(hpaData.tissues)) && ~ismember(upper(tissue),upper(arrayData.tissues))
0107     throw(MException('','The tissue name does not match'));   
0108 end
0109 if any(celltype)
0110     %Check that both data types has cell type defined if that is to be used
0111     if ~isfield(hpaData,'celltypes') || ~isfield(arrayData,'celltypes')
0112         throw(MException('','Both hpaData and arrayData must contain cell type information if cell type is to be used'));   
0113     end
0114     if ~ismember(upper(celltype),upper(hpaData.celltypes)) && ~ismember(upper(celltype),upper(arrayData.celltypes))
0115         throw(MException('','The cell type name does not match'));   
0116     end
0117 end
0118 
0119 %Some preprocessing of the structures to speed up a little
0120 %Remove all tissues that are not the correct one
0121 J=~strcmpi(hpaData.tissues,tissue);
0122 
0123 %If cell type is supplied, then only keep that cell type
0124 if any(celltype)
0125     J=J | ~strcmpi(hpaData.celltypes,celltype);
0126 end
0127 
0128 hpaData.tissues(J)=[];
0129 if isfield(hpaData,'celltypes')
0130     hpaData.celltypes(J)=[];
0131 end
0132 if isfield(hpaData,'gene2Level')
0133     hpaData.gene2Level(:,J)=[];
0134 end
0135 if isfield(hpaData,'gene2Type')
0136     hpaData.gene2Type(:,J)=[];
0137 end
0138 if isfield(hpaData,'gene2Reliability')
0139     hpaData.gene2Reliability(:,J)=[];
0140 end
0141 
0142 %Remove all genes from the structures that are not in model or that aren't
0143 %measured in the tissue
0144 if ~isempty(hpaData.genes) %This should not be necessary, but the summation is a 0x1 matrix and the other is []
0145     I=~ismember(hpaData.genes,model.genes) | sum(hpaData.gene2Level,2)==0;
0146 else
0147     I=[];
0148 end
0149 hpaData.genes(I)=[];
0150 if isfield(hpaData,'gene2Level')
0151     hpaData.gene2Level(I,:)=[];
0152 end
0153 if isfield(hpaData,'gene2Type')
0154     hpaData.gene2Type(I,:)=[];
0155 end
0156 if isfield(hpaData,'gene2Reliability')
0157     hpaData.gene2Reliability(I,:)=[];
0158 end
0159 
0160 I=strcmpi(arrayData.tissues,tissue);
0161 %If cell type is supplied, then only keep that cell type
0162 if any(celltype)
0163     I=I & strcmpi(arrayData.celltypes,celltype);
0164 end
0165 
0166 %Remove all genes from the structures that are not in model or that aren't
0167 %measured in the tissue
0168 J=~ismember(arrayData.genes,model.genes) | myAll(isnan(arrayData.levels(:,I)),2);
0169 arrayData.genes(J)=[];
0170 arrayData.levels(J,:)=[];
0171 
0172 %Calculate the scores for the arrayData. These scores are
0173 %calculated for each genes from its fold change between the tissue/celltype(s) in
0174 %question and all other celltypes. This is a lower quality data than
0175 %protein abundance, since a gene that is equally highly expressed in all cell types
0176 %will have a score of 0.0. These scores are therefore only used for genes
0177 %for which there is no HPA data available. The fold changes are transformed
0178 %as min(log(x),10) for x>1 and max(log(x),-5) for x<1 in order to have
0179 %negative scores for lower expressed genes and to scale the scrores to have
0180 %somewhat lower weights than the HPA scores
0181 tempArrayLevels=arrayData.levels;
0182 tempArrayLevels(isnan(tempArrayLevels))=0;
0183 average=sum(tempArrayLevels,2)./sum(~isnan(arrayData.levels),2);
0184 if strcmpi(multipleCellScoring,'best')
0185     current=max(tempArrayLevels(:,I),[],2);
0186 else
0187     current=sum(tempArrayLevels(:,I),2)./sum(~isnan(arrayData.levels(:,I)),2);
0188 end
0189 if any(current)
0190     aScores=5*log(current./average);
0191 else
0192     aScores=[];
0193 end
0194 aScores(aScores>0)=min(aScores(aScores>0),10);
0195 aScores(aScores<0)=max(aScores(aScores<0),-5);
0196 
0197 %Map the HPA levels to scores
0198 [I J]=ismember(upper(hpaData.levels),upper(hpaLevelScores.names));
0199 if ~all(I)
0200     throw(MException('','There are expression level categories that do not match to hpaLevelScores'));  
0201 end
0202 [K L M]=find(hpaData.gene2Level);
0203 scores=hpaLevelScores.scores(J);
0204 if strcmpi(multipleCellScoring,'best')
0205     hScores=max(sparse(K,L,scores(M),numel(hpaData.genes),numel(hpaData.tissues)),[],2);
0206 else
0207     hScores=mean(sparse(K,L,scores(M),numel(hpaData.genes),numel(hpaData.tissues)),2);
0208 end
0209 
0210 %Get the scores for the genes, only use HPA if available
0211 geneScores=inf(numel(model.genes),1)*-1;
0212 hpaScores=geneScores;
0213 arrayScores=geneScores;
0214 
0215 [I J]=ismember(model.genes,hpaData.genes);
0216 hpaScores(I)=hScores(J(I));
0217 geneScores(I)=hScores(J(I));
0218 [I J]=ismember(model.genes,arrayData.genes);
0219 arrayScores(I)=aScores(J(I));
0220 geneScores(I & myIsInf(geneScores))=aScores(J(I & myIsInf(geneScores)));
0221 
0222 %Remove the genes that have no data from the model
0223 I=ismember(model.genes,hpaData.genes) | ismember(model.genes,arrayData.genes);
0224 model.genes(~I)=[];
0225 model.rxnGeneMat(:,~I)=[];
0226 
0227 %Map the genes to the HPA/array genes
0228 [hpaExist hpaMap]=ismember(model.genes,hpaData.genes);
0229 [arrayExist arrayMap]=ismember(model.genes,arrayData.genes);
0230 
0231 %Set the default scores for reactions without genes
0232 rxnScores=ones(numel(model.rxns),1)*noGeneScore;
0233 
0234 %Loop through the reactions and calculate the scores
0235 for i=1:numel(model.rxns)
0236    %Check if it has genes
0237    I=find(model.rxnGeneMat(i,:));
0238    if any(I)
0239        %If any of the genes exist in hpaData, then don't use arrayData
0240        if any(hpaExist(I))
0241            %At least one gene was found in HPA
0242            if strcmpi(multipleGeneScoring,'best')
0243                rxnScores(i)=max(hScores(hpaMap(I(hpaExist(I)))));
0244            else
0245                rxnScores(i)=mean(hScores(hpaMap(I(hpaExist(I)))));
0246            end
0247        else
0248            %Use array data
0249            if any(arrayExist(I))
0250               %At least one gene was found in the array data
0251                if strcmpi(multipleGeneScoring,'best')
0252                    rxnScores(i)=max(aScores(arrayMap(I(arrayExist(I)))));
0253                else
0254                    rxnScores(i)=mean(aScores(arrayMap(I(arrayExist(I)))));
0255                end
0256            end
0257        end
0258    end
0259 end
0260 end
0261 
0262 %This is because isinf and all returns 0x1 for empty set, which gives a
0263 %concatenation error. Do like this instead of having many if statements
0264 function y=myIsInf(x)
0265     y=isinf(x);
0266     if isempty(y)
0267         y=[];
0268     end
0269 end
0270 function y=myAll(x,dim)
0271     y=all(x,dim);
0272     if isempty(y)
0273         y=[];
0274     end
0275 end

Generated on Tue 23-Apr-2013 15:18:37 by m2html © 2005