Home > RAVEN > getPhylDist.m

getPhylDist

PURPOSE ^

getPhylDist

SYNOPSIS ^

function phylDistStruct=getPhylDist(keggPath,onlyInKingdom)

DESCRIPTION ^

 getPhylDist
   Calculates distance between species in KEGG based on systematic name.

   keggPath        pathway to the KEGG database files
   onlyInKingdom   if true, it generates a distance matrix with distance 
                   Inf for organisms from another domains
                   (Prokaryota, Eukaryota) (opt, default false)

   phylDistStruct  a structure with a list of organism ids and a matrix
                   that specifies their pairwise distances

   This simple metric is based on the number of nodes two organisms are
   away from each other in KEGG.

   Usage: phylDistStruct=getPhylDist(keggPath,onlyInKingdom)

   Rasmus Agren, 2012-12-17

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function phylDistStruct=getPhylDist(keggPath,onlyInKingdom)
0002 % getPhylDist
0003 %   Calculates distance between species in KEGG based on systematic name.
0004 %
0005 %   keggPath        pathway to the KEGG database files
0006 %   onlyInKingdom   if true, it generates a distance matrix with distance
0007 %                   Inf for organisms from another domains
0008 %                   (Prokaryota, Eukaryota) (opt, default false)
0009 %
0010 %   phylDistStruct  a structure with a list of organism ids and a matrix
0011 %                   that specifies their pairwise distances
0012 %
0013 %   This simple metric is based on the number of nodes two organisms are
0014 %   away from each other in KEGG.
0015 %
0016 %   Usage: phylDistStruct=getPhylDist(keggPath,onlyInKingdom)
0017 %
0018 %   Rasmus Agren, 2012-12-17
0019 %
0020 
0021 if nargin<2
0022     onlyInKingdom=false;
0023 end
0024 
0025 %Check if the reactions have been parsed before and saved. If so, load the
0026 %model.
0027 [ST I]=dbstack('-completenames');
0028 ravenPath=fileparts(ST(I).file);
0029 distFile=fullfile(ravenPath,'kegg','keggPhylDist.mat');
0030 if exist(distFile, 'file')
0031     fprintf(['NOTE: Importing KEGG phylogenetic distance matrix from ' strrep(distFile,'\','/') '.\n']);
0032     load(distFile);
0033 else
0034     %Download required files from KEGG if it doesn't exist in the directory
0035     downloadKEGG(keggPath);
0036 
0037     %Open the file that describes the naming of the species
0038     fid = fopen(fullfile(keggPath,'taxonomy'), 'r');
0039 
0040     phylDistStruct.ids={};
0041 
0042     %Keeps the categories for each organism
0043     orgCat={};
0044 
0045     currentCat={}; %Keeps track of the current category
0046     depth=0; %Keeps track of the current tree depth
0047 
0048     %Loop through the file
0049     orgCounter=0;
0050     while 1
0051       %Get the next line
0052       tline = fgetl(fid);
0053 
0054       %Abort at end of file
0055       if ~ischar(tline)
0056           break;
0057       end
0058 
0059       if any(tline)
0060           %Check if it's a new category
0061           if tline(1)=='#'
0062               %Find the first space (=depth +1)
0063               sPos=strfind(tline,' ')-1; %Should always exist
0064               sPos=sPos(1);
0065 
0066               %If we have stepped back one step in the tree
0067               if sPos<depth
0068                   currentCat=currentCat(1:sPos);
0069               end
0070               depth=sPos;
0071 
0072               currentCat{depth}=tline(sPos+2:end);
0073           else
0074               orgCounter=orgCounter+1;
0075               %It is an organism
0076               %Get the id between first and second white space
0077               sPos=find(isstrprop(tline, 'wspace')); %Should always exist
0078               phylDistStruct.ids{orgCounter}=tline(sPos(1)+1:sPos(2)-1);
0079               orgCat{orgCounter}=currentCat;
0080           end
0081       end
0082     end
0083 
0084     %Generate a distance matrix (very straight forward here, not neat)
0085     phylDistStruct.distMat=zeros(numel(phylDistStruct.ids));
0086     for i=1:numel(phylDistStruct.ids)
0087         for j=1:numel(phylDistStruct.ids)
0088             if onlyInKingdom==true
0089                if ~strcmp(orgCat{i}(1),orgCat{j}(1))
0090                    phylDistStruct.distMat(i,j)=Inf;
0091                    continue;
0092                end
0093             end
0094             %Calculate the distance between then
0095             dist=numel(orgCat{i})-numel(orgCat{j});
0096             if dist>0
0097                 aCat=orgCat{i}(1:end-dist);
0098             else
0099                 aCat=orgCat{i};
0100             end
0101             if dist<0
0102                 bCat=orgCat{j}(1:end+dist);
0103             else
0104                 bCat=orgCat{j};
0105             end
0106 
0107             %Loop through the categories and stop when they are the same
0108             for k=numel(aCat):-1:1
0109                if strcmp(aCat{k},bCat{k})
0110                   break; 
0111                end
0112             end
0113             phylDistStruct.distMat(i,j)=dist+numel(aCat)-k;
0114         end
0115     end
0116     %Save the structure
0117     save(distFile,'phylDistStruct');
0118 end
0119 end

Generated on Tue 16-Jul-2013 21:50:02 by m2html © 2005