Home > RAVEN > SBMLFromExcel.m

SBMLFromExcel

PURPOSE ^

SBMLFromExcel

SYNOPSIS ^

function SBMLFromExcel(fileName, outputFileName,COBRAFormat,printWarnings)

DESCRIPTION ^

 SBMLFromExcel
   Converts a model in the Excel format to SBML.

   fileName        the Excel file
   outputFileName  the SBML file
   COBRAFormat     true if the model should be saved in COBRA Toolbox
                   format. Only limited support at the moment (opt,
                   default false)
   printWarnings   true if warnings about model issues should be reported
                   (opt, default true)

   For a detailed description of the file format, see the supplied manual.

   Usage: SBMLFromExcel(fileName,outputFileName,COBRAFormat,printWarnings)

   Rasmus Agren, 2013-03-07

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function SBMLFromExcel(fileName, outputFileName,COBRAFormat,printWarnings)
0002 % SBMLFromExcel
0003 %   Converts a model in the Excel format to SBML.
0004 %
0005 %   fileName        the Excel file
0006 %   outputFileName  the SBML file
0007 %   COBRAFormat     true if the model should be saved in COBRA Toolbox
0008 %                   format. Only limited support at the moment (opt,
0009 %                   default false)
0010 %   printWarnings   true if warnings about model issues should be reported
0011 %                   (opt, default true)
0012 %
0013 %   For a detailed description of the file format, see the supplied manual.
0014 %
0015 %   Usage: SBMLFromExcel(fileName,outputFileName,COBRAFormat,printWarnings)
0016 %
0017 %   Rasmus Agren, 2013-03-07
0018 %
0019 
0020 if nargin<3
0021     COBRAFormat=false;
0022 end
0023 if nargin<4
0024     printWarnings=true;
0025 end
0026 
0027 %Check if the file exists
0028 if ~exist(fileName,'file')
0029     throw(MException('','The Excel file could not be found'));
0030 end
0031 
0032 try
0033     [crap,crap,raw]=xlsread(fileName,'MODEL');
0034 catch
0035     throw(MException('','Could not load the MODEL sheet'));
0036 end
0037 raw=cleanImported(raw);
0038 
0039 %It is assumed that the first line is labels and that the second one is
0040 %info
0041 allLabels={'MODELID';'MODELNAME';'DEFAULT LOWER';'DEFAULT UPPER';'CONTACT GIVEN NAME';'CONTACT FAMILY NAME';'CONTACT EMAIL';'ORGANIZATION';'TAXONOMY';'NOTES'};
0042 modelAnnotation=[];
0043 modelAnnotation.ID=[];
0044 modelAnnotation.name=[];
0045 modelAnnotation.givenName=[];
0046 modelAnnotation.familyName=[];
0047 modelAnnotation.email=[];
0048 modelAnnotation.organization=[];
0049 modelAnnotation.taxonomy=[];
0050 modelAnnotation.note=[];
0051 defaultLower=-1000;
0052 defaultUpper=1000;
0053 
0054 %Loop through the labels
0055 [I J]=ismember(upper(raw(1,:)),allLabels);
0056 I=find(I);
0057 for i=1:numel(I)
0058     switch J(I(i))
0059         case 1
0060             if any(raw{I(i),2})
0061                 modelAnnotation.ID=num2str(raw{2,I(i)}); %Should be string already
0062                 if ~isempty(regexp(modelAnnotation.ID,'[^a-z_A-Z0-9]', 'once'))
0063                     throw(MException('','Illegal character(s) in model id')); 
0064                 end
0065             else
0066                 throw(MException('','No model ID supplied'));
0067             end
0068         case 2
0069             if any(raw{2,I(i)})
0070                 modelAnnotation.name=num2str(raw{2,I(i)}); %Should be string already
0071             else
0072                 throw(MException('','No model name supplied'));
0073             end
0074         case 3
0075             if isnumeric(raw{2,I(i)})
0076                 if ~isnan(raw{2,I(i)})
0077                     defaultLower=raw{2,I(i)};
0078                 else
0079                     fprintf('NOTE: DEFAULT LOWER not supplied. Uses -1000.');
0080                     defaultLower=-1000;
0081                 end
0082             else
0083                 %Try to convert string to number
0084                 if isnan(str2double(raw{2,I(i)}))
0085                     throw(MException('','DEFAULT LOWER must be numeric'));
0086                 else
0087                     defaultLower=raw{2,I(i)};
0088                 end
0089             end
0090         case 4
0091             if isnumeric(raw{2,I(i)})
0092                 if ~isnan(raw{2,I(i)})
0093                     defaultUpper=raw{2,I(i)};
0094                 else
0095                     fprintf('NOTE: DEFAULT UPPER not supplied. Uses 1000.');
0096                     defaultUpper=1000;
0097                 end
0098             else
0099                 %Try to convert string to number
0100                 if isnan(str2double(raw{2,I(i)}))
0101                     throw(MException('','DEFAULT UPPER must be numeric'));
0102                 else
0103                     defaultUpper=raw{2,I(i)};
0104                 end
0105             end
0106         case 5
0107             if any(raw{2,I(i)})
0108                 modelAnnotation.givenName=num2str(raw{2,I(i)}); %Should be string already
0109             end
0110         case 6
0111             if any(raw{2,I(i)})
0112                 modelAnnotation.familyName=num2str(raw{2,I(i)}); %Should be string already
0113             end
0114         case 7
0115             if any(raw{2,I(i)})
0116                 modelAnnotation.email=num2str(raw{2,I(i)}); %Should be string already
0117             end
0118         case 8
0119             if any(raw{2,I(i)})
0120                 modelAnnotation.organization=num2str(raw{2,I(i)}); %Should be string already
0121             end    
0122         case 9
0123             if any(raw{2,I(i)})
0124                 modelAnnotation.taxonomy=num2str(raw{2,I(i)}); %Should be string already
0125             end 
0126         case 10
0127             if any(raw{2,I(i)})
0128                 modelAnnotation.note=num2str(raw{2,I(i)}); %Should be string already
0129             end     
0130     end  
0131 end
0132 
0133 %Check some needed stuff
0134 if isempty(modelAnnotation.ID)
0135     throw(MException('','There must be a column named MODELID in the MODEL sheet'));   
0136 end
0137 if isempty(modelAnnotation.name)
0138     throw(MException('','There must be a column named MODELNAME in the MODEL sheet'));   
0139 end
0140 
0141 %Get compartment information
0142 try
0143     [crap,crap,raw]=xlsread(fileName,'COMPS');
0144 catch
0145     throw(MException('','Could not load the COMPS sheet'));
0146 end    
0147 raw=cleanImported(raw);
0148 
0149 allLabels={'COMPABBREV';'COMPNAME';'INSIDE';'GO TERM'};
0150 compAbbrev={};
0151 compName={};
0152 compOutside={};
0153 compGO={};
0154 
0155 %Loop through the labels
0156 [I J]=ismember(upper(raw(1,:)),allLabels);
0157 I=find(I);
0158 for i=1:numel(I)
0159     switch J(I(i))
0160         case 1
0161            compAbbrev=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0162         case 2
0163            compName=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0164         case 3
0165            compOutside=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0166         case 4
0167             if COBRAFormat==false
0168                 compGO=raw(2:end,I(i));
0169             end
0170     end
0171 end
0172 
0173 %Check that necessary fields are loaded
0174 if isempty(compAbbrev)
0175     throw(MException('','There must be a column named COMPABBREV in the COMPS sheet'));   
0176 end
0177 if isempty(compName)
0178     throw(MException('','There must be a column named COMPNAME in the COMPS sheet'));   
0179 end
0180 if isempty(compOutside)
0181     compOutside=cell(numel(compAbbrev),1);
0182     compOutside(:)={'NaN'};
0183 end
0184 if isempty(compGO) && COBRAFormat==false
0185     compGO=cell(numel(compAbbrev),1);
0186 end
0187 
0188 %Check that the abbreviated form only contains one character
0189 for i=1:length(compAbbrev)
0190     if ~all(isstrprop(compAbbrev{i},'alphanum'))
0191         throw(MException('',['The compartment id "' compAbbrev{i} '" is not alphanumeric']));
0192     end
0193 end
0194 
0195 %Check to see that all the OUTSIDE compartments are defined
0196 for i=1:length(compOutside)
0197    if ~strcmp('NaN',compOutside{i}) %A little weird, but easier
0198       if ~any(strcmp(compOutside{i},compAbbrev))
0199           throw(MException('',['The outside compartment for "' compName{i} '" does not have a corresponding compartment'])); 
0200       end
0201    else
0202        compOutside{i}=NaN;
0203    end
0204 end
0205 
0206 %Gene info is not needed to load the model, so check for this here
0207 geneNames={};
0208 geneID1={};
0209 geneID2={};
0210 geneShortNames={};
0211 geneKEGG={};
0212 geneComps={};
0213 %Get all the genes and info about them
0214 foundGenes=true;
0215 try
0216     [crap,crap,raw]=xlsread(fileName,'GENES');
0217 catch
0218     foundGenes=false;
0219     if printWarnings==true
0220         fprintf('WARNING: There is no spreadsheet named GENES\n')
0221     end
0222 end
0223 if foundGenes==true
0224     raw=cleanImported(raw);
0225 
0226     allLabels={'GENE NAME';'GENE ID 1';'GENE ID 2';'SHORT NAME';'COMPARTMENT';'KEGG MAPS'};
0227     
0228     %Loop through the labels
0229     [I J]=ismember(upper(raw(1,:)),allLabels);
0230     I=find(I);
0231     foundGenes=false;
0232     for i=1:numel(I)
0233         switch J(I(i))
0234             case 1
0235                 geneNames=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0236                 foundGenes=true;
0237             case 2
0238                 geneID1=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0239             case 3
0240                 geneID2=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0241             case 4
0242                 geneShortNames=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0243             case 5
0244                 geneComps=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0245             case 6
0246                 geneKEGG=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0247         end
0248     end
0249 
0250     if foundGenes==false
0251         throw(MException('','There must be a column named GENE NAME in the GENES sheet'));
0252     end
0253 
0254     if isempty(geneID1)
0255         geneID1=cell(numel(geneNames));
0256     end
0257     if isempty(geneID2)
0258         geneID2=cell(numel(geneNames));
0259     end
0260     if isempty(geneShortNames)
0261         geneShortNames=cell(numel(geneNames));
0262     end
0263     if isempty(geneKEGG)
0264         geneKEGG=cell(numel(geneNames));
0265     end
0266     if isempty(geneComps)
0267        geneComps=cell(numel(geneNames),1);
0268        geneComps(:)=compAbbrev(1);
0269        if printWarnings==true
0270             fprintf('WARNING: There is no column named COMPARTMENT in the GENES sheet. All genes will be assigned to the first compartment in COMPS. This is merely for annotation and has no effect on the functionality of the model\n');
0271        end
0272     end
0273 
0274     %Check that geneName contain only strings and no empty strings
0275     if ~iscellstr(geneNames)
0276         throw(MException('','All gene names have to be strings'));
0277     else
0278         if any(strcmp('',geneNames)) || any(strcmp('NaN',geneNames))
0279             throw(MException('','There can be no empty strings in gene names'));
0280         end
0281     end
0282 
0283     %Check that geneComp contain only strings and no empty string
0284     if ~iscellstr(geneComps)
0285         throw(MException('','All gene compartments have to be strings'));
0286     else
0287         if ~isempty(find(strcmp('',geneComps),1))
0288             throw(MException('','There can be no empty strings in gene compartments'));
0289         end
0290     end
0291 
0292     %Check that all gene compartments correspond to a compartment
0293     I=ismember(geneComps,compAbbrev);
0294     if ~all(I)
0295         bad=find(I==0,1);
0296         throw(MException('',['The gene ' geneNames{bad} ' has a compartment abbreviation that could not be found']));
0297     end
0298 
0299     %Check that all gene names are unique
0300     if length(geneNames)~=length(unique(geneNames))
0301         throw(MException('','Not all gene names are unique'));
0302     end
0303 
0304     %Check that geneNames contain no weird characters
0305     illegalCells=regexp(geneNames,'[();:]', 'once');
0306     if ~isempty(cell2mat(illegalCells))
0307         errorText='Illegal character(s) in gene names:\n';
0308         for i=1:length(illegalCells)
0309             if ~isempty(illegalCells{i})
0310                 errorText=[errorText geneNames{i} '\n'];
0311             end
0312         end
0313         throw(MException('',errorText));
0314     end
0315 
0316     %To fit with other code
0317     geneID1(strcmp('NaN',geneID1))={NaN};
0318     geneID2(strcmp('NaN',geneID2))={NaN};
0319     geneShortNames(strcmp('NaN',geneShortNames))={NaN};
0320     geneKEGG(strcmp('NaN',geneKEGG))={NaN};
0321 end
0322 
0323 %Loads the reaction data
0324 try
0325     [crap,crap,raw]=xlsread(fileName,'RXNS');
0326 catch
0327     throw(MException('','Could not load the RXNS sheet'));
0328 end
0329 raw=cleanImported(raw);
0330 
0331 allLabels={'RXNID';'NAME';'EQUATION';'EC-NUMBER';'GENE ASSOCIATION';'LOWER BOUND';'UPPER BOUND';'OBJECTIVE';'COMPARTMENT';'SUBSYSTEM';'SBO TERM';'REPLACEMENT ID'};
0332 reactionIDs={};
0333 reactionNames={};
0334 equations={};
0335 ecNumbers={};
0336 geneAssociations={};
0337 lowerBounds=[];
0338 upperBounds=[];
0339 objectives=[];
0340 reactionComps={};
0341 reactionSubsystem={};
0342 reactionSBO={};
0343 reactionReplacement={};
0344 
0345 %Loop through the labels
0346 [I J]=ismember(upper(raw(1,:)),allLabels);
0347 I=find(I);
0348 for i=1:numel(I)
0349     switch J(I(i))
0350         case 1
0351            reactionIDs=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0352         case 2
0353            reactionNames=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0354         case 3
0355            equations=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0356         case 4
0357            ecNumbers=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0358         case 5
0359            geneAssociations=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0360         case 6
0361            %Check here if they are all numeric
0362            if all(cellfun(@isnumeric,raw(2:end,I(i))))
0363                 lowerBounds=cell2mat(raw(2:end,I(i)));
0364            else
0365                 throw(MException('','The lower bounds must be numerical values')); 
0366            end
0367         case 7
0368            %Check here if they are all numeric
0369            if all(cellfun(@isnumeric,raw(2:end,I(i))))
0370                 upperBounds=cell2mat(raw(2:end,I(i)));
0371            else
0372                 throw(MException('','The upper bounds must be numerical values')); 
0373            end
0374         case 8
0375            %Check here if they are all numeric
0376            if all(cellfun(@isnumeric,raw(2:end,I(i))))
0377                 objectives=cell2mat(raw(2:end,I(i)));
0378            else
0379                 throw(MException('','The objectives must be numerical values')); 
0380            end
0381         case 9
0382             reactionComps=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0383         case 10
0384             reactionSubsystem=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0385         case 11
0386             if COBRAFormat==false
0387                 reactionSBO=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0388             end
0389         case 12
0390             reactionReplacement=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);    
0391     end
0392 end
0393 
0394 %Check that all necessary reaction info has been loaded (RXNID and
0395 %EQUATION)
0396 if isempty(reactionIDs)
0397      throw(MException('','There must be a column named RXNID in the RXNS sheet'));   
0398 end
0399 if isempty(equations)
0400      throw(MException('','There must be a column named EQUATION in the RXNS sheet'));   
0401 end
0402 
0403 %Check if some other stuff is loaded and populate with default values
0404 %otherwise
0405 if isempty(reactionNames)
0406    reactionNames=cell(numel(reactionIDs),1);
0407    if printWarnings==true
0408         fprintf('WARNING: There is no column named NAME in the RXNS sheet. Empty strings will be used as reaction names\n');
0409    end
0410 end
0411 if isempty(lowerBounds)
0412    lowerBounds=nan(numel(reactionIDs),1);
0413    if printWarnings==true
0414         fprintf('WARNING: There is no column named LOWER BOUND in the RXNS sheet. Default bounds will be used\n');
0415    end
0416 end
0417 if isempty(upperBounds)
0418    upperBounds=nan(numel(reactionIDs),1);
0419    if printWarnings==true
0420         fprintf('WARNING: There is no column named UPPER BOUND in the RXNS sheet. Default bounds will be used\n');
0421    end
0422 end
0423 if isempty(objectives)
0424    objectives=nan(numel(reactionIDs),1);
0425    if printWarnings==true
0426         fprintf('WARNING: There is no column named OBJECTIVE in the RXNS sheet\n');
0427    end
0428 end
0429 if isempty(reactionComps)
0430    reactionComps=cell(numel(reactionIDs),1);
0431    reactionComps(:)=compAbbrev(1);
0432    if printWarnings==true
0433         fprintf('WARNING: There is no column named COMPARTMENT in the RXNS sheet. All reactions will be assigned to the first compartment in COMPS. This is merely for annotation and has no effect on the functionality of the model\n');
0434    end
0435 end
0436 
0437 %To fit with other code
0438 reactionNames(strcmp('NaN',reactionNames))={NaN};
0439 ecNumbers(strcmp('NaN',ecNumbers))={NaN};
0440 geneAssociations(strcmp('NaN',geneAssociations))={NaN};
0441 reactionSubsystem(strcmp('NaN',reactionSubsystem))={NaN};
0442 reactionSBO(strcmp('NaN',reactionSBO))={NaN};
0443 reactionReplacement(strcmp('NaN',reactionReplacement))={NaN};
0444 
0445 %Check that an SBO-term is associated with each reaction
0446 if COBRAFormat==false
0447     if ~isempty(reactionSBO)
0448         if ~iscellstr(reactionSBO)
0449             reactionSBO=[];
0450         else
0451             if ~isempty(find(strcmp('',reactionSBO),1))
0452                 reactionSBO=[];
0453                 if printWarnings==true
0454                     fprintf('WARNING: Not all reactions have associated SBO-terms. SBO-terms will not be used.\n');
0455                 end
0456             end
0457         end
0458     end
0459 end
0460 
0461 if isempty(ecNumbers)
0462    ecNumbers=cell(numel(reactionIDs),1);
0463 end
0464 if isempty(geneAssociations)
0465    geneAssociations=cell(numel(reactionIDs),1);
0466 end
0467 if isempty(reactionSubsystem)
0468    reactionSubsystem=cell(numel(reactionIDs),1);
0469 end
0470 
0471 %Replace the reaction IDs for those IDs that have a corresponding
0472 %replacement name.
0473 I=cellfun(@any,reactionReplacement);
0474 reactionIDs(I)=reactionReplacement(I);
0475 
0476 %Check that all reaction IDs are unique
0477 I=1:numel(reactionIDs);
0478 [crap J]=unique(reactionIDs);
0479 I(J)=[];
0480 if any(I)
0481     errorText='Duplicate reaction IDs:\n';
0482     for i=1:length(I)
0483         errorText=[errorText reactionIDs{I(i)} '\n'];
0484     end
0485     throw(MException('',errorText));
0486 end
0487 
0488 %Check that there are no empty strings in reactionIDs or equations
0489 if any(strcmp('NaN',reactionIDs)) || any(strcmp('',reactionIDs))
0490     throw(MException('','There are empty reaction IDs')); 
0491 end
0492 
0493 if any(strcmp('NaN',equations)) || any(strcmp('',equations))
0494     throw(MException('','There are empty equations')); 
0495 end
0496 
0497 %Check that reactionIDs contain no weird characters
0498 illegalCells=regexp(reactionIDs,'[^a-z_A-Z0-9]', 'once');
0499 if ~isempty(cell2mat(illegalCells))
0500     errorText='Illegal character(s) in reaction IDs:\n';
0501     for i=1:length(illegalCells)
0502         if ~isempty(illegalCells{i})
0503             errorText=[errorText reactionIDs{i} '\n'];
0504         end
0505     end
0506     throw(MException('',errorText));
0507 end
0508 
0509 %Check that all reactions have compartments defined
0510 if any(strcmp('NaN',reactionComps)) || any(strcmp('',reactionComps))
0511     throw(MException('','All reactions must have an associated compartment string')); 
0512 end
0513 
0514 %Fix empty reaction names
0515 I=~cellfun(@any,reactionNames);
0516 reactionNames(I)={''};
0517     
0518 %Check gene association and compartment for each reaction
0519 for i=1:length(reactionNames)    
0520     %Check that all gene associations have a match in the gene list
0521     if ischar(geneAssociations{i}) && length(geneAssociations{i})>0
0522         indexes=strfind(geneAssociations{i},':'); %Genes are separated by ":" for AND and ";" for OR
0523         indexes=unique([indexes strfind(geneAssociations{i},';')]);
0524         if isempty(indexes)
0525             %See if you have a match (it can't have more than one since the
0526             %names are unique)
0527             if isempty(find(strcmp(geneAssociations{i},geneNames),1))
0528                 throw(MException('',['The gene association in reaction ' reactionIDs{i} ' (' geneAssociations{i} ') is not present in the gene list']));
0529             end   
0530         else
0531             temp=[0 indexes numel(geneAssociations{i})+1];
0532             for j=1:numel(indexes)+1;
0533                 %The reaction has several associated genes
0534                 geneName=geneAssociations{i}(temp(j)+1:temp(j+1)-1);
0535                 if isempty(find(strcmp(geneName,geneNames),1))
0536                     throw(MException('',['The gene association in reaction ' reactionIDs{i} ' (' geneName ') is not present in the gene list']));
0537                 end
0538             end
0539         end
0540     end
0541 end
0542 %Check that the compartment for each reaction can be found
0543 I=ismember(reactionComps,compAbbrev);
0544 if ~all(I)
0545     bad=find(I==0,1);
0546     throw(MException('',['The reaction ' reactionIDs{bad} ' has a compartment abbreviation that could not be found']));
0547 end
0548 
0549 %Check that the reaction names don't contain any forbidden characters
0550 illegalCells=regexp(reactionNames,'[%"<>\\]', 'once');
0551 if ~isempty(cell2mat(illegalCells))
0552     errorText='Illegal character(s) in reaction names:\n';
0553     for i=1:length(illegalCells)
0554         if ~isempty(illegalCells{i})
0555             errorText=[errorText reactionNames{i} '\n'];
0556         end
0557     end
0558     throw(MException('',errorText));
0559 end
0560 
0561 %Get all the metabolites and info about them
0562 try
0563     [crap,crap,raw]=xlsread(fileName,'METS');
0564 catch
0565     throw(MException('','Could not load the METS sheet'));
0566 end
0567 raw=cleanImported(raw);            
0568 
0569 allLabels={'METID';'METNAME';'UNCONSTRAINED';'MIRIAM';'COMPOSITION';'INCHI';'COMPARTMENT';'REPLACEMENT ID'};
0570 
0571 %Load the metabolite information
0572 metaboliteIDs={};
0573 metaboliteNames={};
0574 metConstrained={};
0575 metMiriam={};
0576 metComposition={};
0577 metInchi={};
0578 metComps={};
0579 metReplacement={};
0580 
0581 %Loop through the labels
0582 [I J]=ismember(upper(raw(1,:)),allLabels);
0583 I=find(I);
0584 for i=1:numel(I)
0585     switch J(I(i))
0586         case 1
0587            metaboliteIDs=raw(2:end,I(i));
0588         case 2
0589            metaboliteNames=raw(2:end,I(i)); 
0590         case 3
0591            metConstrained=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0592         case 4
0593            metMiriam=raw(2:end,I(i));  
0594         case 5
0595            metComposition=raw(2:end,I(i));
0596         case 6
0597            metInchi=raw(2:end,I(i)); 
0598         case 7
0599             metComps=cellfun(@num2str,raw(2:end,I(i)),'UniformOutput',false);
0600 
0601             %Check that all metabolites have compartments defined
0602             if any(strcmp('NaN',metComps)) || any(strcmp('',metComps))
0603                 throw(MException('','All metabolites must have an associated compartment string')); 
0604             end
0605         case 8
0606             metReplacement=raw(2:end,I(i));
0607     end
0608 end
0609 
0610 %Check that necessary fields are loaded (METID)
0611 if isempty(metaboliteIDs)
0612      throw(MException('','There must be a column named METID in the METS sheet'));   
0613 end
0614 
0615 %Check that some other stuff is loaded and use default values otherwise
0616 if isempty(metaboliteNames)
0617    metaboliteNames=cell(numel(metaboliteIDs),1);
0618    if printWarnings==true
0619         fprintf('WARNING: There is no column named METNAME in the METS sheet. METID will be used as name\n');
0620    end
0621 end
0622 if isempty(metConstrained)
0623    metConstrained=cell(numel(metaboliteIDs),1);
0624    metConstrained(:)={'false'};
0625    if printWarnings==true
0626         fprintf('WARNING: There is no column named UNCONSTRAINED in the METS sheet. All metabolites will be constrained\n');
0627    end
0628 end
0629 if isempty(metMiriam)
0630    metMiriam=cell(numel(metaboliteIDs),1);
0631 end
0632 if isempty(metComposition)
0633    metComposition=cell(numel(metaboliteIDs),1);
0634 end
0635 if isempty(metInchi)
0636    metInchi=cell(numel(metaboliteIDs),1);
0637 end
0638 if isempty(metComps)
0639    metComps=cell(numel(metaboliteIDs),1);
0640    metComps(:)=compAbbrev(1);
0641    if printWarnings==true
0642         fprintf('WARNING: There is no column named COMPARTMENT in the METS sheet. All metabolites will be assigned to the first compartment in COMPS. Note that RAVEN makes extensive use of metabolite names and compartments. Some features will therefore not function correctly if metabolite compartments are not correctly assigned\n');
0643    end
0644 end
0645 
0646 metConstrained(strcmp('NaN',metConstrained))={NaN};
0647 metConstrained(strcmp('1',metConstrained))={'true'};
0648 metConstrained(strcmp('0',metConstrained))={'false'};
0649 
0650 [I J]=unique(upper(metaboliteIDs));
0651 if numel(metaboliteIDs)~=numel(I)
0652     K=1:numel(metaboliteIDs);
0653     K(J)=[];
0654     toPrint=metaboliteIDs{K(1)};
0655     for i=2:numel(K)
0656         toPrint=[toPrint '\n' metaboliteIDs{K(i)}];
0657     end
0658     throw(MException('',['The following metabolites are duplicates:\n' toPrint]));
0659 end
0660 
0661 %Replace the metabolite IDs for those IDs that have a corresponding
0662 %replacement metabolite. This is not used for matching, but will be checked
0663 %for consistency with SBML naming conventions
0664 finalMetIDs=metaboliteIDs; 
0665 I=cellfun(@any,metReplacement);
0666 finalMetIDs(I)=metReplacement(I);
0667 
0668 %Check that the metaboliteIDs are strings
0669 if ~iscellstr(finalMetIDs)
0670     throw(MException('','All metabolite IDs must be strings'));
0671 end
0672 
0673 %Check that all metabolites IDs are unique after the mapping as well.
0674 if length(finalMetIDs)~=length(unique(finalMetIDs))
0675     throw(MException('','Not all metabolite IDs are unique'));
0676 end
0677 
0678 %Check that metaboliteIDs contain no weird characters.
0679 illegalCells=regexp(finalMetIDs,'[^a-z_A-Z0-9]', 'once');
0680 if ~isempty(cell2mat(illegalCells))
0681     errorText='Illegal character(s) in metabolite IDs:\n';
0682     for i=1:length(illegalCells)
0683         if ~isempty(illegalCells{i})
0684             errorText=[errorText finalMetIDs{i} '\n'];
0685         end
0686     end
0687     throw(MException('',errorText));
0688 end
0689 
0690 for i=1:length(finalMetIDs)
0691    %Check that the "constrained" fields are "true", "false", or NaN
0692    if iscellstr(metConstrained(i))
0693         if ~strcmpi('false',metConstrained(i)) && ~strcmpi('true',metConstrained(i))
0694             throw(MException('',['The UNCONSTRAINED property for metabolite "' finalMetIDs{i} '" must be "true", "false", or not set']));
0695         else
0696             metConstrained{i}=lower(metConstrained{i});
0697         end
0698     else
0699        if ~isnan(metConstrained{i})
0700            throw(MException('',['The UNCONSTRAINED property for metabolite "' finalMetIDs{i} '" must be "true", "false", or not set']));
0701        else
0702            metConstrained{i}='false';
0703        end
0704    end
0705     
0706    %If the metabolite name isn't set, replace it with the metabolite id
0707    if ~ischar(metaboliteNames{i}) || isempty(metaboliteNames{i})
0708        metaboliteNames(i)=finalMetIDs(i);
0709    end
0710 end
0711 
0712 %Check that the compartment for each reaction can be found
0713 I=ismember(metComps,compAbbrev);
0714 if ~all(I)
0715     bad=find(I==0,1);
0716     throw(MException('',['The metabolite ' finalMetIDs{bad} ' has a compartment abbreviation that could not be found']));
0717 end
0718 
0719 %Check that it doesn't contain any forbidden characters
0720 illegalCells=regexp(metaboliteNames,'["%<>\\]', 'once');
0721 if ~isempty(cell2mat(illegalCells))
0722     errorText='Illegal character(s) in metabolite names:\n';
0723     for i=1:length(illegalCells)
0724         if ~isempty(illegalCells{i})
0725             errorText=[errorText metaboliteNames{i} '\n'];
0726         end
0727     end
0728     throw(MException('',errorText));
0729 end
0730 
0731 %Everything seems fine with the metabolite IDs, compartments, genes, and
0732 %reactions
0733 
0734 %It works fine to write reactions with only one side, like " <=> met1" or
0735 %"met1 => ", but reactions written without the extra space, like "met1 =>"
0736 %must be padded to fit with the rest of the code.
0737 for i=1:numel(equations)
0738     if numel(equations{i})>1
0739         if strcmp(equations{i}(1),'<') || strcmp(equations{i}(1),'=')
0740            equations{i}=[' ' equations{i}]; 
0741         end
0742         if strcmp(equations{i}(end),'>')
0743            equations{i}=[equations{i} ' ']; 
0744         end
0745     end
0746 end
0747 
0748 revIndexes=strfind(equations,' <=> ');
0749 irrevIndexes=strfind(equations,' => ');
0750 
0751 if any(cellfun(@isempty,revIndexes) & cellfun(@isempty,irrevIndexes))
0752     throw(MException('',['The reaction ' reactionIDs{find(cellfun(@isempty,revIndexes) & cellfun(@isempty,irrevIndexes),1)} ' does not have reversibility data']));
0753 end
0754 
0755 %Split the reactions in left and right side
0756 lhs=cell(numel(equations),1);
0757 rhs=cell(numel(equations),1);
0758 for i=1:numel(equations)
0759     stop=[revIndexes{i} irrevIndexes{i}];
0760     lhs{i}=equations{i}(1:stop(1)-1);
0761     if isempty(revIndexes{i})
0762         rhs{i}=equations{i}(stop(1)+4:end);
0763     else
0764         rhs{i}=equations{i}(stop(1)+5:end);
0765     end
0766 end
0767 
0768 leftPlus=strfind(lhs,' + ');
0769 rightPlus=strfind(rhs,' + ');
0770 leftSpace=strfind(lhs,' ');
0771 rightSpace=strfind(rhs,' ');
0772 
0773 %Preallocate a METxRXNS stoichiometric matrix
0774 S=sparse(length(metaboliteIDs),length(reactionIDs));
0775 
0776 %Loop through each of the equations
0777 for i=1:numel(equations)
0778     for k=-1:2:1 %-1 is for reactant side
0779         if k==-1
0780            currentPlus=leftPlus;
0781            currentSpace=leftSpace;
0782            currentSide=lhs;
0783         else
0784            currentPlus=rightPlus;
0785            currentSpace=rightSpace;
0786            currentSide=rhs; 
0787         end
0788         if any(currentSide{i})
0789             starts=[1 currentPlus{i}+3 numel(currentSide{i})+4];
0790             for j=1:numel(starts)-1
0791                 %Check to see if it starts with a coefficient
0792                 coeff=str2double(currentSide{i}(starts(j):currentSpace{i}(find(currentSpace{i}>starts(j),1))-1));
0793                 if ~isnan(coeff)
0794                     %If it starts with a coefficient
0795                     metName=currentSide{i}(currentSpace{i}(find(currentSpace{i}>starts(j),1))+1:starts(j+1)-4);
0796                 else
0797                     coeff=1;
0798                     metName=currentSide{i}(starts(j):starts(j+1)-4);
0799                 end
0800 
0801                 %Check to see that it was found in the list
0802                 %Check if the metabolite is present
0803                 metID=find(strcmpi(metName,metaboliteIDs),1);
0804 
0805                 %If it didn't find the metabolite
0806                 if isempty(metID)
0807                     throw(MException('',['The metabolite "' metName '" in reaction ' reactionIDs{i} ' was not found in the metabolite list']));
0808                 end
0809 
0810                 %If the metabolite was present in more than one copy
0811                 if length(metID)>1
0812                     throw(MException('',['The metabolite "' metName '" in reaction ' reactionIDs{i} ' was found in more than one copy in the metabolite list']));
0813                 end
0814 
0815                 %If the metabolites don't match cases
0816                 if strcmp(metName,metaboliteIDs(metID))~=1
0817                     fprintf('WARNING: The metabolite "%s" in reaction %s differs in upper/lower case compared to the metabolite list\n',metName,reactionIDs{i});
0818                 end
0819 
0820                 %Check to see that the metabolite isn't already present in
0821                 %the reaction. This means that the reaction is on the form
0822                 %A => A
0823                 if printWarnings==true
0824                     if S(metID,i)~=0
0825                         fprintf(['WARNING: The metabolite "' metName '" in reaction ' reactionIDs{i} ' is present more than once. Only the net reaction will be exported\n']);
0826                     end
0827                 end
0828 
0829                 S(metID,i)=S(metID,i)+coeff*k;
0830             end
0831         end
0832     end
0833 end
0834 
0835 reversibility=zeros(length(equations),1);
0836 
0837 reversibility(~cellfun(@isempty,revIndexes))=1;
0838 
0839 %Check that there are no conflicting bounds
0840 I=find(lowerBounds>upperBounds);
0841 if any(I)
0842     errorText='The following reactions(s) have contradicting bounds:\n';
0843     for i=1:numel(I)
0844         errorText=[errorText reactionIDs{I(i)} '\n'];
0845     end
0846     throw(MException('',errorText));
0847 end
0848 
0849 I=find(reversibility==0 & lowerBounds<0);
0850 if any(I)
0851     errorText='The following reactions(s) have bounds that contradict their directionality:\n';
0852     for i=1:numel(I)
0853         errorText=[errorText reactionIDs{I(i)} '\n'];
0854     end
0855     throw(MException('',errorText));
0856 end
0857 
0858 if printWarnings==true
0859     %Check that all the metabolites are being used
0860     involvedMat=S;
0861     involvedMat(involvedMat~=0)=1;
0862     usage=sum(involvedMat,2);
0863     notPresent=find(usage==0);
0864     unbalanced=find(usage==1);
0865 
0866     if ~isempty(notPresent)
0867         errorText='WARNING: The following metabolite(s) are never used:\n';
0868         for i=1:length(notPresent)
0869             errorText=[errorText '(' finalMetIDs{notPresent(i)} ') ' metaboliteNames{notPresent(i)} '\n'];
0870         end
0871         fprintf([errorText '\n']);
0872     end
0873 
0874     %Note: This should take reactants/products into account. If a
0875     %metabolite is only a product in all (irreversible) reactions, then is
0876     %is it unbalanced
0877     badMets=unbalanced(ismember(metConstrained(unbalanced),'false'));
0878     if any(badMets)
0879         errorText='WARNING: The following internal metabolite(s) are only used in one reaction (zero flux is the only solution):\n';
0880         for i=1:length(badMets)
0881             errorText=[errorText '(' finalMetIDs{badMets(i)} ') ' metaboliteNames{badMets(i)} '[' metComps{badMets(i)} ']\n'];
0882         end
0883         fprintf([errorText '\n']);
0884     end
0885 
0886     %Check that all metabolites are balanced for C, N, S, P and the number
0887     %of R-groups
0888     
0889     [nC nN nS nP foundComp]=getComposition(metComposition, metInchi);
0890 
0891     %Loop through the reactions and look at those where all the metabolites
0892     %have composition data. Just count the others
0893     cantbalanceRxns=[];
0894     for i=1:size(S,2)
0895         foundMets=find(S(:,i));
0896 
0897         if sum(foundComp(foundMets))==numel(foundMets)
0898             cBalance=sum(S(foundMets,i).*nC(foundMets));
0899             nBalance=sum(S(foundMets,i).*nN(foundMets));
0900             sBalance=sum(S(foundMets,i).*nS(foundMets));
0901             pBalance=sum(S(foundMets,i).*nP(foundMets));
0902             %Arbitatary small number
0903             if abs(cBalance(1,1))>0.00000001
0904                 fprintf('WARNING: The reaction %s is not balanced with respect to carbon\n',reactionIDs{i});
0905             end
0906             if abs(nBalance(1,1))>0.00000001
0907                 fprintf('WARNING: The reaction %s is not balanced with respect to nitrogen\n',reactionIDs{i});
0908             end
0909             if abs(sBalance(1,1))>0.00000001
0910                 fprintf('WARNING: The reaction %s is not balanced with respect to sulfur\n',reactionIDs{i});
0911             end
0912             if abs(pBalance(1,1))>0.00000001
0913                 fprintf('WARNING: The reaction %s is not balanced with respect to phosphorus\n',reactionIDs{i});
0914             end
0915         else
0916             cantbalanceRxns=[cantbalanceRxns i];
0917         end
0918     end
0919     if any(cantbalanceRxns)>0
0920         fprintf('\nWARNING: The following %s reactions could not be checked for mass balancing\n',num2str(numel(cantbalanceRxns)));
0921         fprintf('%s\n',reactionIDs{cantbalanceRxns});
0922     end
0923 end
0924 
0925 %All the information has been collected. Time to write SBML!
0926 exportSBML(outputFileName,modelAnnotation,finalMetIDs,metaboliteNames,...
0927 metMiriam, metComposition, metInchi, metConstrained, metComps, reactionIDs, reactionNames, reactionComps, S, reversibility,...
0928 compName, compOutside, compAbbrev, compGO, lowerBounds, upperBounds, objectives, ecNumbers, geneAssociations, reactionSubsystem,...
0929 reactionSBO, defaultLower,defaultUpper, geneNames, geneID1, geneID2, geneShortNames, geneComps, geneKEGG, COBRAFormat);
0930 end
0931 
0932 %Cleans up the structure that is imported from using xlsread
0933 function raw=cleanImported(raw)
0934     %First check that it's a cell array. If a sheet is completely empty,
0935     %then raw=NaN
0936     if iscell(raw)
0937         %Remove columns that aren't strings. If you cut and paste a lot in the sheet
0938         %there tends to be columns that are NaN
0939         I=cellfun(@isstr,raw(1,:));
0940         raw=raw(:,I);
0941 
0942         %Find the lines that are not commented
0943         keepers=[1; find(strcmp('',raw(:,1)) | cellfun(@wrapperNAN,raw(:,1)))];
0944         raw=raw(keepers,:);
0945 
0946         %Check if there are any rows that are all NaN. This could happen if
0947         %xlsread reads too far. Remove any such rows.
0948         nans=cellfun(@wrapperNAN,raw);
0949         I=all(nans,2);
0950         raw(I,:)=[];
0951 
0952         %Also check if there are any lines that contain only NaNs or white
0953         %spaces. This could happen if you accidentaly inserted a space
0954         %somewhere
0955         whites=cellfun(@wrapperWS,raw);
0956         I=all(whites,2);
0957         raw(I,:)=[];
0958     else
0959         raw={''};
0960     end
0961     
0962     %Checks if something is NaN. Can't use isnan with cellfun as it does it
0963     %character by character for strings
0964     function I=wrapperNAN(A)
0965        I=any(isnan(A)); 
0966     end
0967     
0968     %Checks if something is all white spaces or NaN
0969     function I=wrapperWS(A)
0970         if isnan(A)
0971             I=true;
0972         else
0973             %isstrprob gives an error if boolean
0974             if islogical(A)
0975                 I=false;
0976             else
0977                 I=all(isstrprop(A,'wspace'));
0978             end
0979         end
0980     end
0981 end
0982 
0983 function [C N S P foundComp]=getComposition(metComposition, metInchi)
0984     %Assume that they are of the same length
0985     C=zeros(size(metComposition));
0986     N=zeros(size(metComposition));
0987     S=zeros(size(metComposition));
0988     P=zeros(size(metComposition));
0989     foundComp=zeros(size(metComposition));
0990     
0991     for i=1:length(metComposition)
0992         inchiError=0;
0993         if ischar(metInchi{i})
0994             if length(metInchi{i})>0
0995                 %Find the formula in the Inchi string. Assume that it is
0996                 %everything between the first and second "\"
0997                 indexes=strfind(metInchi{i},'/');
0998                 if length(indexes)==0
0999                    inchiError=1; 
1000                 else
1001                    %For some simple molecules such as salts only the first "\" is present
1002                    if length(indexes)==1
1003                         formula=metInchi{i}(indexes(1)+1:length(metInchi{i}));
1004                    else
1005                         formula=metInchi{i}(indexes(1)+1:indexes(2)-1);
1006                    end
1007                    [nC, nN, nS, nP, errorFlag] = compFromFormula(formula);
1008                    if errorFlag==0
1009                         C(i)=nC;
1010                         N(i)=nN;
1011                         S(i)=nS;
1012                         P(i)=nP;
1013                         foundComp(i)=1;
1014                    end
1015                 end
1016             else
1017                 inchiError=1;
1018             end
1019         else
1020             inchiError=1;
1021         end          
1022             
1023         if inchiError==1
1024             %If no InChI could be found
1025             if ischar(metComposition{i})
1026                 if length(metComposition{i})>0
1027                     [nC, nN, nS, nP, errorFlag] = compFromFormula(metComposition{i});
1028                     if errorFlag==0
1029                         C(i)=nC;
1030                         N(i)=nN;
1031                         S(i)=nS;
1032                         P(i)=nP;
1033                         foundComp(i)=1;
1034                     end
1035                 end
1036             end
1037         end
1038     end
1039 end
1040 
1041 function [nC, nN, nS, nP, errorFlag] = compFromFormula(formula)
1042     %IMPORTANT! This does not work if elements can have more than one
1043     %character. Look in to that!
1044     errorFlag=0;
1045     nonNumeric=regexp(formula,'[^0-9]');
1046     abbrevs=['C';'N';'S';'P'];
1047     comp=zeros(size(abbrevs));
1048     for i=1:size(abbrevs)
1049         index=strfind(formula,abbrevs(i));
1050         if length(index)==1
1051            nextNN=find(nonNumeric>index);
1052            if ~isempty(nextNN)
1053                 comp(i)=str2double(formula(index+1:nonNumeric(nextNN(1))-1));
1054            else
1055                 comp(i)=str2double(formula(index+1:length(formula)));
1056            end
1057            %Might be temporary!! Assumes that there is 1 atom if the
1058            %str2double thing didn't work
1059            if isnan(comp(i))
1060                comp(i)=1;
1061            end
1062         else
1063            if length(index)>1
1064                %THIS HAS TO BE FIXED! THIS CAN HAPPEN FOR POLYMERS FOR
1065                %EXAMPLE!!
1066                 errorFlag=1;
1067            end
1068         end
1069     end
1070     nC=comp(1);
1071     nN=comp(2);
1072     nS=comp(3);
1073     nP=comp(4);
1074 end
1075     
1076 function [errorFlag] = exportSBML(outputFile,modelAnnotation,metaboliteIDs,...
1077     metaboliteNames, metMiriam, metComposition, metInchi, metConstrained, metComps,reactionIDs, reactionNames,...
1078     reactionComps, stochiometricMatrix, reversibility, compName, compOutside, compAbbrev, compGO, lowerBounds,...
1079     upperBounds, objectives, ecNumbers, geneAssociations, reactionSubsystem, reactionSBO, defaultLower,defaultUpper,...
1080     geneNames, geneID1, geneID2, geneShortNames, geneComps, geneKEGG, COBRAFormat)
1081 
1082 %Generate temporary name
1083 tempFile=tempname;
1084 
1085 fid = fopen(tempFile,'w');
1086 if COBRAFormat==false
1087     intro=['<?xml version="1.0" encoding="UTF-8" ?>'...
1088     '<sbml xmlns="http://www.sbml.org/sbml/level2/version3" level="2" version="3">'...
1089     '<model metaid="metaid_' modelAnnotation.ID '" id="' modelAnnotation.ID '" name="' modelAnnotation.name '">'];
1090     if any(modelAnnotation.note)
1091         intro=[intro '<notes><body xmlns="http://www.w3.org/1999/xhtml">' modelAnnotation.note '</body></notes>\n'];
1092     end
1093     intro=[intro '<annotation>'...
1094     '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/">'...
1095     '<rdf:Description rdf:about="#metaid_' modelAnnotation.ID '">'];
1096     if any(modelAnnotation.givenName) && any(modelAnnotation.familyName)
1097         intro=[intro '<dc:creator rdf:parseType="Resource"><rdf:Bag><rdf:li rdf:parseType="Resource">'];
1098         if any(modelAnnotation.givenName) && any(modelAnnotation.familyName)
1099            intro=[intro '<vCard:N rdf:parseType="Resource"><vCard:Family>' modelAnnotation.familyName '</vCard:Family><vCard:Given>' modelAnnotation.givenName '</vCard:Given></vCard:N>'];
1100         end
1101         if any(modelAnnotation.email)
1102            intro=[intro '<vCard:EMAIL>' modelAnnotation.email '</vCard:EMAIL>'];
1103         end
1104         if any(modelAnnotation.organization)
1105            intro=[intro '<vCard:ORG><vCard:Orgname>' modelAnnotation.organization '</vCard:Orgname></vCard:ORG>'];
1106         end
1107         
1108         intro=[intro '</rdf:li></rdf:Bag></dc:creator>'];
1109         intro=[intro '<dcterms:created rdf:parseType="Resource">'...
1110         '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF>'... 
1111         '</dcterms:created>'...
1112         '<dcterms:modified rdf:parseType="Resource">'...
1113         '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF>'... 
1114         '</dcterms:modified>'];
1115     end
1116     if any(modelAnnotation.taxonomy)
1117            intro=[intro '<bqbiol:is><rdf:Bag><rdf:li rdf:resource="urn:miriam:' modelAnnotation.taxonomy '" /></rdf:Bag></bqbiol:is>'];
1118     end
1119     intro=[intro '</rdf:Description></rdf:RDF></annotation>\n'];
1120 else
1121     intro=['<?xml version="1.0" encoding="UTF-8"?><sbml xmlns="http://www.sbml.org/sbml/level2" level="2" version="1" xmlns:html="http://www.w3.org/1999/xhtml"><model id="' modelAnnotation.ID '" name="' modelAnnotation.name '">'];
1122     if any(modelAnnotation.note)
1123         intro=[intro '<notes><body xmlns="http://www.w3.org/1999/xhtml">' modelAnnotation.note '</body></notes>\n'];
1124     end
1125 end
1126 
1127 intro=[intro '<listOfUnitDefinitions>'...
1128     '<unitDefinition id="mmol_per_gDW_per_hr">'...
1129     '<listOfUnits>'...
1130     '<unit kind="mole" scale="-3"/>'...
1131     '<unit kind="second" multiplier="0.00027778" exponent="-1"/>'...
1132     '</listOfUnits>'...
1133     '</unitDefinition>'...
1134     '</listOfUnitDefinitions>\n'...
1135     '<listOfCompartments>'];
1136 
1137 %Write intro
1138 fprintf(fid,intro);
1139 
1140 for i=1:length(compName)
1141     %Check if it's outside anything
1142     if ~isnan(compOutside{i})
1143         if COBRAFormat==false
1144             append=[' outside="C_' compOutside{i} '" spatialDimensions="3"'];
1145         else
1146             append=[' outside="C_' compOutside{i} '" spatialDimensions="3"'];
1147         end
1148     else
1149         append=' spatialDimensions="3"';
1150     end
1151     if COBRAFormat==false
1152         fprintf(fid,['<compartment metaid="metaid_C_' compAbbrev{i} '" id="C_' compAbbrev{i} '" name="' compName{i} '"' append ' sboTerm="SBO:0000290">']);
1153         
1154         %Check if there is an associated GO-term
1155         if isstr(compGO{i}) && length(compGO{i})>0
1156             compinfo=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms='...
1157                     '"http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" '...
1158                     'xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#metaid_C_' compAbbrev{i} '">'...
1159                     '<bqbiol:is><rdf:Bag><rdf:li rdf:resource="urn:miriam:obo.go:' compGO{i} '"/></rdf:Bag></bqbiol:is>'...
1160                     '</rdf:Description></rdf:RDF></annotation></compartment>'];
1161         else
1162             compinfo='</compartment>';
1163         end
1164         fprintf(fid,compinfo);
1165     else
1166         fprintf(fid,['<compartment id="C_' compAbbrev{i} '" name="' compName{i} '"' append '></compartment>']);
1167     end
1168 end
1169 
1170 intro='</listOfCompartments>\n<listOfSpecies>';
1171 fprintf(fid,intro);
1172 
1173 %Write metabolites
1174 for i=1:length(metaboliteIDs)
1175     if COBRAFormat==false
1176         toprint=['<species metaid="metaid_M_' metaboliteIDs{i} '" id="M_' metaboliteIDs{i} '" name="' metaboliteNames{i} '" compartment="C_' metComps{i} '" boundaryCondition="' metConstrained{i} '" sboTerm="SBO:0000299">'];
1177     else
1178         %Get the formula for the compound
1179         if ischar(metComposition{i}) && length(metComposition{i})>0
1180             if ~isempty(regexp(metComposition{i},'[^a-zA-Z0-9]', 'once'))
1181                 formula='_';
1182             else
1183                 formula=['_' metComposition{i}];
1184             end
1185         else
1186             %Find the formula in the Inchi string. Assume that it is
1187             %everything between the first and second "\"
1188             indexes=strfind(metInchi{i},'/');
1189             if length(indexes)<2
1190                 formula='_'; 
1191             else
1192                 if ~isempty(regexp(metInchi{i}(indexes(1)+1:indexes(2)-1),'[^a-zA-Z0-9]', 'once'))
1193                     formula='_';
1194                 else
1195                     formula=['_' metInchi{i}(indexes(1)+1:indexes(2)-1)];
1196                 end
1197             end
1198         end
1199         toprint=['<species id="M_' metaboliteIDs{i} '" name="' metaboliteNames{i} formula '" compartment="C_' metComps{i} '" boundaryCondition="' metConstrained{i} '">'];
1200     end
1201     %Print some stuff if there is a formula for the compound
1202     if COBRAFormat==false
1203         if ischar(metComposition{i})
1204             if length(metComposition{i})>0
1205                 toprint=[toprint '<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>FORMULA: '  metComposition{i} '</p></body></notes>'];
1206             end
1207         end
1208     end
1209     %Only print annotations for metabolites with some miriam link. This is because I don't know how "unknown"
1210     %metabolites should be presented, and it seems unlikely that you will
1211     %have InChI without a database link. This might be temporary....
1212     if COBRAFormat==false
1213         if ischar(metMiriam{i}) && length(metMiriam{i})>0
1214             toprint=[toprint '<annotation>'];
1215             %Print InChI if available
1216             if ischar(metInchi{i})
1217                 if length(metInchi{i})>0
1218                     toprint=[toprint '<in:inchi xmlns:in="http://biomodels.net/inchi" metaid="metaid_M_' metaboliteIDs{i} '_inchi">InChI=' metInchi{i} '</in:inchi>']; 
1219                     isInchi=1;
1220                 else
1221                     isInchi=0;
1222                 end
1223             else
1224                isInchi=0; 
1225             end
1226             %Print some more annotation stuff
1227             toprint=[toprint '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" '...
1228                 'xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/">'...
1229                 '<rdf:Description rdf:about="#metaid_M_' metaboliteIDs{i} '">'...
1230                 '<bqbiol:is>'...
1231                 '<rdf:Bag>'];
1232             %If InchI
1233             if isInchi==1
1234                 toprint=[toprint '<rdf:li rdf:resource="#metaid_M_' metaboliteIDs{i} '_inchi" />'];
1235             end
1236             %Print miriam
1237             toprint=[toprint '<rdf:li rdf:resource="urn:miriam:' metMiriam{i} '"/>'];
1238 
1239             %Finish up
1240             toprint=[toprint '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>'];
1241         end
1242     end
1243     toprint=[toprint '</species>\n'];
1244     fprintf(fid,toprint);
1245 end    
1246 
1247 %Add information on all modifiers (that is the genes)
1248 %Loop through to replace empty cells with ''. NOT PRETTY!
1249 for i=1:length(geneAssociations)
1250    if ~ischar(geneAssociations{i})
1251        geneAssociations{i}='';
1252    end
1253 end
1254 
1255 if COBRAFormat==false
1256     %First add all the genes
1257     for i=1:length(geneNames)
1258        toprint=['<species metaid="metaid_E_' int2str(i) '" id="E_' int2str(i) '" name="' geneNames{i} '" compartment="C_' geneComps{i} '" sboTerm="SBO:0000014">'];
1259        %Print gene name if present
1260        if ischar(geneShortNames{i}) && length(geneShortNames{i})>0
1261             toprint=[toprint '<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>SHORT NAME: '  geneShortNames{i} '</p></body></notes>'];
1262        end
1263        
1264        %Print annotation info if present
1265        if (ischar(geneKEGG{i}) && length(geneKEGG{i})>1) || (ischar(geneID1{i}) && length(geneID1{i})>1) || (ischar(geneID2{i}) && length(geneID2{i})>1)
1266             toprint=[toprint '<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" '...
1267                 'xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" '...
1268                 'xmlns:bqmodel="http://biomodels.net/model-qualifiers/">'...
1269                 '<rdf:Description rdf:about="#metaid_E_' int2str(i) '"><bqbiol:is><rdf:Bag>'];
1270                 if ischar(geneID1{i}) && length(geneID1{i})>0
1271                     toprint=[toprint '<rdf:li rdf:resource="urn:miriam:' geneID1{i} '" />'];
1272                 end
1273                 if ischar(geneID2{i}) && length(geneID2{i})>0
1274                     toprint=[toprint '<rdf:li rdf:resource="urn:miriam:' geneID2{i} '" />'];
1275                 end
1276                 
1277                 if ischar(geneKEGG{i}) && length(geneKEGG{i})>0
1278                     %KEGG maps are separated by ":"
1279                     [crap crap crap crap crap crap keggMaps]=regexp(geneKEGG{i},'[:]');
1280                     
1281                     for j=1:length(keggMaps)
1282                         toprint=[toprint '<rdf:li rdf:resource="urn:miriam:kegg.pathway:' keggMaps{j} '" />'];
1283                     end
1284                 end
1285                 
1286                 toprint=[toprint '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>'];
1287        end
1288        toprint=[toprint '</species>\n'];
1289        fprintf(fid,toprint);
1290     end
1291     
1292     %Loop through all reactions and find gene associations which contain
1293     %":", which means that they are governed by several genes
1294     uniqueGenes=unique(geneAssociations);
1295     [crap reactions crap crap crap crap crap]=regexp(uniqueGenes,'[:]');
1296     reactions=find(cellfun('length',reactions));
1297     [crap crap crap crap crap crap mods]=regexp(uniqueGenes(reactions),'[;]');
1298     
1299     geneComplexes={};
1300     complexGenes={};
1301     %Loop through each modifier and add the ones that are complexes
1302     for i=1:numel(reactions)
1303         %Check to see if it's a complex
1304         [crap complex crap crap crap crap comGenes]=regexp(mods{i},'[:]');
1305         complex=find(cellfun('length',complex));
1306         for j=1:numel(complex)
1307             geneComplexes=[geneComplexes;mods{i}{complex(j)}];
1308             complexGenes=[complexGenes;comGenes(complex(j))];
1309         end
1310     end
1311     
1312     %Remove duplicate complexes
1313     [geneComplexes,I]=unique(geneComplexes);
1314     complexGenes=complexGenes(I);
1315     
1316     %The SBO term for the complex is set to be "protein". Might not be
1317     %correct.
1318     for i=1:length(geneComplexes)
1319         %Put complexes in the same compartment as the first gene
1320         toprint=['<species metaid="metaid_Cx_' int2str(i) '" id="Cx_' int2str(i) '" name="' geneComplexes{i} '" compartment="C_' geneComps{1} '" sboTerm="SBO:0000297">'];
1321         toprint=[toprint '</species>\n'];
1322         fprintf(fid,toprint);
1323     end
1324 end
1325 
1326 %Finish metbolites
1327 fprintf(fid,'</listOfSpecies>');
1328 
1329 %Add reactions
1330 fprintf(fid,'<listOfReactions>');
1331 for i=1:length(reactionIDs)
1332 %Get reversibility
1333     reversible='false';
1334     if reversibility(i)==1
1335         reversible='true';
1336     end
1337 
1338     if COBRAFormat==false
1339         if ~isempty(reactionSBO)
1340              SBO=[' sboTerm="' reactionSBO{i} '"'];
1341         else
1342             SBO='';
1343         end
1344         fprintf(fid,['<reaction metaid="metaid_R_' reactionIDs{i} '" id="R_' reactionIDs{i} '" name="' reactionNames{i} '" reversible="' reversible '"' SBO '>']);
1345     else
1346         fprintf(fid,['<reaction id="R_' reactionIDs{i} '" name="' reactionNames{i} '" reversible="' reversible '">']);
1347     end
1348     if COBRAFormat==false
1349         if ischar(reactionSubsystem{i}) && length(reactionSubsystem{i})>0
1350             fprintf(fid,'<notes>');
1351             fprintf(fid,['<body xmlns="http://www.w3.org/1999/xhtml"><p>SUBSYSTEM: ' reactionSubsystem{i} '</p></body>']);
1352             fprintf(fid,'</notes>');
1353         end
1354     else
1355         fprintf(fid,'<notes>');
1356         if ~isnan(geneAssociations{i})
1357             %In order to adhere to the COBRA standards it should be like
1358             %this:
1359             %-If only one gene then no parentheses
1360             %-If only "and" or only "or" there should only be one set of
1361             %parentheses
1362             %-If both "and" and "or", then split on "or". This is not
1363             %complete, but it's the type of relationship supported by the
1364             %Excel formulation
1365             aSign=strfind(geneAssociations{i},':');
1366             oSign=strfind(geneAssociations{i},';');
1367             if isempty(aSign) && isempty(oSign)
1368                 geneString=geneAssociations{i};
1369             else
1370                 if isempty(aSign)
1371                     geneString=['( ' strrep(geneAssociations{i},';',' or ') ' )'];
1372                 else
1373                     if isempty(oSign)
1374                         geneString=['( ' strrep(geneAssociations{i},':',' and ') ' )'];
1375                     else
1376                         geneString=['(( ' strrep(geneAssociations{i},';',' ) or ( ') ' ))'];
1377                         geneString=strrep(geneString,':',' and ');
1378                     end
1379                 end
1380             end
1381             
1382             %geneString=strrep(geneAssociations{i},':',' and ');
1383             %geneString=strrep(geneString,';',' or ');
1384             fprintf(fid,['<html:p>GENE_ASSOCIATION: ' geneString '</html:p>']);
1385         end
1386         if ischar(reactionSubsystem{i}) && length(reactionSubsystem{i})>0
1387             fprintf(fid,['<html:p>SUBSYSTEM: ' reactionSubsystem{i} '</html:p>']);
1388         end
1389         if ischar(ecNumbers{i}) && length(ecNumbers{i})>0
1390             fprintf(fid,['<html:p>PROTEIN_CLASS: ' ecNumbers{i} '</html:p>']);
1391         end
1392         fprintf(fid,'</notes>');
1393     end
1394     
1395     if COBRAFormat==false
1396         if ischar(ecNumbers{i}) && length(ecNumbers{i})>0
1397             toprint=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" '...
1398             'xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" '...
1399             'xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" '...
1400             'xmlns:bqmodel="http://biomodels.net/model-qualifiers/">'...
1401             '<rdf:Description rdf:about="#metaid_R_' reactionIDs{i} '">'...
1402             '<bqbiol:is>'...
1403             '<rdf:Bag>'...
1404             '<rdf:li rdf:resource="urn:miriam:ec-code:' ecNumbers{i} '"/>'... 
1405             '</rdf:Bag>'...
1406             '</bqbiol:is>'...
1407             '</rdf:Description>'...
1408             '</rdf:RDF>'...
1409             '</annotation>'];
1410             fprintf(fid,toprint);
1411         end
1412     end
1413     
1414     %The reactants have negative values in the stochiometric matrix
1415     compounds=stochiometricMatrix(:,i);
1416     reactants=find(compounds<0);
1417     products=find(compounds>0);
1418     
1419     if any(reactants)
1420         fprintf(fid,'<listOfReactants>');
1421 
1422         for j=1:length(reactants)
1423             toprint=['<speciesReference species="M_' metaboliteIDs{reactants(j)} '" stoichiometry="' char(num2str(-1*compounds(reactants(j)))) '"/>'];
1424             fprintf(fid,toprint);
1425         end  
1426         fprintf(fid,'</listOfReactants>');
1427     end
1428     if any(products)
1429         fprintf(fid,'<listOfProducts>');
1430         for j=1:length(products)
1431             toprint=['<speciesReference species="M_' metaboliteIDs{products(j)} '" stoichiometry="' char(num2str(compounds(products(j)))) '"></speciesReference>'];
1432             fprintf(fid,toprint);
1433         end  
1434         fprintf(fid,'</listOfProducts>');
1435     end
1436     if COBRAFormat==false
1437         %Print modifiers if available.
1438         if ischar(geneAssociations{i}) && length(geneAssociations{i})>0
1439             %Loop through the number of modifiers (isoenzymes, complexes...)
1440             [crap crap crap crap crap crap mods]=regexp(geneAssociations{i},';');
1441             fprintf(fid,'<listOfModifiers>');
1442             for j=1:numel(mods)
1443                 %Is it a complex?
1444                 if isempty(strfind(mods{j},':'))
1445                     %Find the correct gene
1446                     index=find(strcmp(mods{j},geneNames),1);
1447                     %Assumes that it is found since that check should have been made
1448                     %before
1449                     fprintf(fid,'<modifierSpeciesReference species="E_%s"/>',num2str(index(1)));
1450                 else
1451                     index=find(strcmp(mods{j},geneComplexes),1);
1452                     %Assumes that it is found since that check should have been made
1453                     %before
1454                     fprintf(fid,'<modifierSpeciesReference species="Cx_%s"/>',num2str(index(1)));
1455                 end
1456             end
1457             fprintf(fid,'</listOfModifiers>');
1458         end
1459     end
1460     
1461     %Print constraints
1462     if isnan(upperBounds(i))
1463         upper=defaultUpper;
1464     else
1465         upper=upperBounds(i);
1466     end
1467     
1468     if isnan(lowerBounds(i))
1469        %Check for reversibility
1470        if reversibility(i)==1
1471            lower=defaultLower;
1472        else
1473            lower=0;
1474        end
1475     else
1476        lower=lowerBounds(i); 
1477     end
1478     
1479     %Print objectives
1480     if isnan(objectives(i))
1481         objective=0;
1482     else
1483         objective=objectives(i);
1484     end
1485     
1486     if COBRAFormat==false
1487         fprintf(fid,'<kineticLaw><math xmlns="http://www.w3.org/1998/Math/MathML"><ci>FLUX_VALUE</ci></math><listOfParameters>');
1488         fprintf(fid,['<parameter id="LB_R_' reactionIDs{i} '" name="LOWER_BOUND" value="' sprintf('%15.8f',lower) '" units="mmol_per_gDW_per_hr"/><parameter id="UB_R_' reactionIDs{i} '" name="UPPER_BOUND" value="' sprintf('%15.8f',upper) '" units="mmol_per_gDW_per_hr"/><parameter id="OBJ_R_' reactionIDs{i} '" name="OBJECTIVE_COEFFICIENT" value="' sprintf('%15.8f',objective) '" units="dimensionless"/><parameter id="FLUX_VALUE" value="0.00000000" units="mmol_per_gDW_per_hr"/>']);
1489     else
1490         fprintf(fid,'<kineticLaw><math xmlns="http://www.w3.org/1998/Math/MathML"><apply><ci> LOWER_BOUND </ci><ci> UPPER_BOUND </ci><ci> OBJECTIVE_COEFFICIENT </ci></apply></math><listOfParameters>');
1491         fprintf(fid,['<parameter id="LOWER_BOUND" value="' sprintf('%15.8f',lower) '"/><parameter id="UPPER_BOUND" value="' sprintf('%15.8f',upper) '"/><parameter id="OBJECTIVE_COEFFICIENT" value="' sprintf('%15.8f',objective) '"/>']);    
1492     end
1493     fprintf(fid,'</listOfParameters></kineticLaw>');
1494     fprintf(fid,'</reaction>\n');
1495 end
1496 
1497 %Add reactions for the creation of complexes
1498 if COBRAFormat==false
1499     for i=1:length(geneComplexes)
1500         fprintf(fid,['<reaction metaid="metaid_R_comp' int2str(i) '" id="R_comp' int2str(i) '" name="' strrep(geneComplexes{i},':',', ') '" reversible="false" sboTerm="SBO:0000176">']);
1501         fprintf(fid,'<listOfReactants>');
1502 
1503         for j=1:length(complexGenes{i,1})
1504             %Assumes that each gene name can be found
1505             toprint=['<speciesReference species="E_' int2str(find(strcmp(complexGenes{i,1}{j},geneNames))) '" stoichiometry="1"/>'];
1506             fprintf(fid,toprint);
1507         end
1508 
1509         fprintf(fid,'</listOfReactants><listOfProducts>');
1510         fprintf(fid,['<speciesReference species="Cx_' int2str(i) '" stoichiometry="1"></speciesReference>']);
1511         fprintf(fid,'</listOfProducts>');
1512         fprintf(fid,'<kineticLaw><math xmlns="http://www.w3.org/1998/Math/MathML"><ci>FLUX_VALUE</ci></math><listOfParameters>');
1513         fprintf(fid,['<parameter id="LB_R_comp' int2str(i) '" name="LOWER_BOUND" value="0.00000000" units="mmol_per_gDW_per_hr"/><parameter id="UB_R_comp' int2str(i) '" name="UPPER_BOUND" value="' sprintf('%15.8f',defaultUpper) '" units="mmol_per_gDW_per_hr"/><parameter id="OBJ_R_comp' int2str(i) '" name="OBJECTIVE_COEFFICIENT" value="0.00000000" units="dimensionless"/><parameter id="FLUX_VALUE" value="0.00000000" units="mmol_per_gDW_per_hr"/>']);
1514         fprintf(fid,'</listOfParameters></kineticLaw>');
1515         fprintf(fid,'</reaction>\n');
1516     end
1517 end
1518 fprintf(fid,'</listOfReactions>');
1519 
1520 %Write outro
1521 outro='</model></sbml>';
1522 fprintf(fid,outro);
1523 
1524 fclose(fid);
1525 
1526 %Replace the target file with the temporary file
1527 if exist(outputFile,'file')
1528     delete(outputFile);
1529 end
1530 movefile(tempFile,outputFile,'f');
1531 
1532 errorFlag=0;
1533 end

Generated on Tue 23-Apr-2013 15:18:37 by m2html © 2005