Home > RAVEN > mapCompartments.m

mapCompartments

PURPOSE ^

mapCompartments

SYNOPSIS ^

function geneScoreStructure=mapCompartments(geneScoreStructure,varargin)

DESCRIPTION ^

 mapCompartments
   Maps compartments in the geneScoreStructure. This is used if you do not
   want a models that uses all of the compartment from the predictor. This
   function will then let you define rules on how the compartments should
   be merged.

   Any number of rules could be defined as consecutive strings or in a cell array.
   'comp1'             comp1 should be kept in the structure

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function geneScoreStructure=mapCompartments(geneScoreStructure,varargin)
0002 % mapCompartments
0003 %   Maps compartments in the geneScoreStructure. This is used if you do not
0004 %   want a models that uses all of the compartment from the predictor. This
0005 %   function will then let you define rules on how the compartments should
0006 %   be merged.
0007 %
0008 %   Any number of rules could be defined as consecutive strings or in a cell array.
0009 %   'comp1'             comp1 should be kept in the structure
0010 
0011 %   'comp1=comp2'       The scores in comp2 are merged to comp1 and comp2 is
0012 %                       removed from the structure. This automatically
0013 %                       keeps comp1 in the structure
0014 
0015 %   'comp1=comp2 comp3' The scores in comp2 and comp3 are merged to comp1
0016 %                       and comp2 & comp2 are removed from the structure.
0017 %                       This automatically keeps comp1 in the structure
0018 
0019 %   'comp1 comp2=comp3' The scores in comp3 are split between comp1 and
0020 %                       comp2. This automatically keeps comp1 and comp2 in
0021 %                       the structure
0022 
0023 %   'comp1=other'       The scores in any compartment not included are
0024 %                       merged to comp1. This is applied after all other
0025 %                       rules.
0026 %
0027 %   When one compartment is merged to another the resulting scores will be
0028 %   the best for each gene in either of the compartments. In the case where
0029 %   one compartment is split among several, the scores for the compartment
0030 %   to be merged is weighted with the number of compartments to split to.
0031 %
0032 %   Example: The predictor you use give prediction for Extracellular,
0033 %   Cytosol, Nucleus, Peroxisome, Mitochondria, ER, and Lysosome. You want to
0034 %   have a model with Extracellular, Cytosol, Mitochondria, and Peroxisome
0035 %   where Lysosome is merged with Peroxisome and all other compartments
0036 %   are merged to the Cytosol.
0037 %
0038 %   GSS=mapCompartments(GSS,'Extracellular','Mitochondria','Peroxisome=Lyso
0039 %   some','Cytosol=other');
0040 %
0041 %   geneScoreStructure  a structure to be used in predictLocalization
0042 %
0043 %   Usage: geneScoreStructure=mapCompartments(geneScoreStructure,varargin)
0044 %
0045 %   Rasmus Agren, 2013-08-01
0046 
0047 varargin=upper(varargin);
0048 
0049 %First find the compartment that will end up in the final structure. They
0050 %are the ones that stand alone or are to the left of some '='
0051 toKeep={};
0052 toMerge={};
0053 I=regexp(varargin,'=','split');
0054 for i=1:numel(varargin)
0055     if numel(I{i})==1
0056        toKeep=[toKeep;I{i}];
0057     else
0058        J=regexp(I{i}(1),' ','split');
0059        K=regexp(I{i}(2),' ','split');
0060        toKeep=[toKeep;J{1}(:)];
0061        toMerge=[toMerge;K{1}(:)];
0062     end
0063 end
0064 
0065 %Check that there are no compartment that should both be merged and kept
0066 if ~isempty(intersect(toKeep,toMerge))
0067     dispEM('There are inconsistencies where one or more compartment(s) should be both kept and merged to another');  
0068 end
0069 
0070 %Check that there are no compartments in the rules that are not in the
0071 %geneScoreStructure.
0072 uComps=upper(geneScoreStructure.compartments);
0073 J=[uComps;{'OTHER'}];
0074 
0075 if ~isempty(setdiff([toKeep;toMerge],J))
0076     dispEM('There are compartment in the rules that are not in geneScoreStructure.compartments');
0077 end
0078 
0079 %Loop through it again and do the mapping
0080 otherIndex=[]; %This stores the rule which maps 'other'.
0081 
0082 for i=1:numel(I)
0083    if numel(I{i})>1
0084        %Get the compartment indexes that should be mapped
0085        J=regexp(I{i}(2),' ','split');
0086        if strcmpi(J{1},'other')
0087            otherIndex=i;
0088            continue;
0089        end
0090        [k K]=ismember(J{1},uComps);
0091        
0092        %And to where they should be mapped
0093        J=regexp(I{i}(1),' ','split');
0094        [l L]=ismember(J{1},uComps);
0095        
0096        %It's not allowed to have rules like A B=C D
0097        if numel(K)>1 && numel(L)>1
0098            dispEM('It is not allowed to have rules like "A B=C D" (map more than one compartment to more than one compartment)');
0099        end
0100        
0101        if ~all(k) || ~all(l)
0102             dispEM('Error in mapping. This most likely means that some compartment(s) are mapped to different compartments in different rules. Use A B=C if you want to map C to several compartments');
0103        end
0104        
0105        %Get the sum of the scores for the compartments that should be
0106        %merged to something else
0107        S=max(geneScoreStructure.scores(:,K),[],2);
0108        for j=1:numel(L)
0109            %If the scores are mapped to several different compartments then
0110            %split the scores between them
0111            geneScoreStructure.scores(:,L(j))=max(geneScoreStructure.scores(:,L(j)),S./numel(L));
0112        end
0113        
0114        %Remove the comparement that were merged
0115        geneScoreStructure.compartments(K)=[];
0116        geneScoreStructure.scores(:,K)=[];
0117        uComps(K)=[];
0118    end
0119 end
0120 
0121 %Then check if there are remaining compartments that should be removed or
0122 %mapped as 'other'
0123 J=find(~ismember(uComps,toKeep));
0124 if any(J)
0125     if any(otherIndex)
0126         K=regexp(I{otherIndex}(1),' ','split');
0127         [l L]=ismember(K{1},uComps);
0128         if l==1 && numel(l)==1
0129             S=max(geneScoreStructure.scores(:,J),[],2);
0130             geneScoreStructure.scores(:,L)=max(geneScoreStructure.scores(:,L),S);
0131         else
0132             dispEM('Could not map "other" to more than one compartment');
0133         end
0134     else
0135        dispEM('There are compartments that are not defined if they should be kept or removed. Use "A=other" or define more rules if you do not want them to be deleted',false); 
0136     end
0137     
0138     %Remove the comparement that were merged
0139     geneScoreStructure.compartments(J)=[];
0140     geneScoreStructure.scores(:,J)=[];
0141 end
0142 
0143 %Renormalize
0144 I=max(geneScoreStructure.scores,[],2);
0145 geneScoreStructure.scores=bsxfun(@times, geneScoreStructure.scores, 1./I);
0146 
0147 %If there are genes that have score 0 in all compartments, remove them and
0148 %print a warning.
0149 I=find(isnan(geneScoreStructure.scores(:,1))); %Only looks a the first colum as it will be the same for the other ones
0150 if any(I)
0151     dispEM('The following genes had score 0.0 in all compartments. They have been removed from the structure. Consider using more rules or "A=other" in order to prevent this:',false,geneScoreStructure.genes(I));
0152     geneScoreStructure.scores(I,:)=[];
0153     geneScoreStructure.genes(I)=[];
0154 end

Generated on Mon 06-Jan-2014 14:58:12 by m2html © 2005