Home > RAVEN > exportToExcelFormat.m

exportToExcelFormat

PURPOSE ^

exportToExcelFormat

SYNOPSIS ^

function exportToExcelFormat(model,filename)

DESCRIPTION ^

 exportToExcelFormat
   Exports a model structure to the Excel format or to a set of
   tab-delimited text files.

   model       a model structure
   filename    filename of the Excel file. This could also be only a path,
               in which case the model is exported to a set of tab-delimited
               text files instead

   The resulting Excel file can be used with SBMLFromExcel to generate a 
   SBML file and to be used for modeling. If only text files should be
   generated then they will will be saved in the path of filename
   under the names excelRxns.txt, excelMets.txt, excelGenes.txt,
   excelModel.txt, and excelComps.txt.

   NOTE: Reactions and genes have no compatment in the model structure,
         but must be assigned to a compartment to adhere to SBML
         standards. Therefore the first compartment in model.comps is
         used. This is purely annotation and has no effect on the
         functionality of the model.

   Usage: exportToExcelFormat(model,filename)

   Rasmus Agren, 2013-07-12

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function exportToExcelFormat(model,filename)
0002 % exportToExcelFormat
0003 %   Exports a model structure to the Excel format or to a set of
0004 %   tab-delimited text files.
0005 %
0006 %   model       a model structure
0007 %   filename    filename of the Excel file. This could also be only a path,
0008 %               in which case the model is exported to a set of tab-delimited
0009 %               text files instead
0010 %
0011 %   The resulting Excel file can be used with SBMLFromExcel to generate a
0012 %   SBML file and to be used for modeling. If only text files should be
0013 %   generated then they will will be saved in the path of filename
0014 %   under the names excelRxns.txt, excelMets.txt, excelGenes.txt,
0015 %   excelModel.txt, and excelComps.txt.
0016 %
0017 %   NOTE: Reactions and genes have no compatment in the model structure,
0018 %         but must be assigned to a compartment to adhere to SBML
0019 %         standards. Therefore the first compartment in model.comps is
0020 %         used. This is purely annotation and has no effect on the
0021 %         functionality of the model.
0022 %
0023 %   Usage: exportToExcelFormat(model,filename)
0024 %
0025 %   Rasmus Agren, 2013-07-12
0026 %
0027 
0028 %The model must have an id and description
0029 if ~isfield(model,'id') || ~isfield(model,'description') 
0030     throw(MException('','The model must have "id" and "description" fields'));
0031 end
0032 if ~any(model.id)
0033     throw(MException('','The model must have an "id" field'));
0034 end
0035 if ~any(model.description)
0036     fprintf('WARNING: The "description" field is empty. Uses the id as description.\n');
0037     model.description=model.id;
0038 end
0039 
0040 %The model cannot have +/- inf as constraints because it's not supported by
0041 %xlsxwrite
0042 I=find(isinf(model.lb));
0043 J=find(isinf(model.lb));
0044 if any(I) || any(J)
0045     fprintf(['WARNING: Some reactions have +/- Inf as bounds. '...
0046         'This is not supported by the Excel exporter. Such bounds are changed to +/- 1000 instead.\n']);
0047     model.lb(I(model.lb(I)<0))=-1000;
0048     model.lb(I(model.lb(I)>0))=1000;
0049     model.ub(J(model.ub(J)<0))=-1000;
0050     model.ub(J(model.ub(J)>0))=1000;
0051 end
0052 
0053 %Print a note
0054 fprintf(['NOTE: Reactions and genes have no compartment in the model structure, but must be assigned to a '...
0055     'compartment to adhere to SBML standards. Therefore the first compartment in model.comps is '...
0056     'used. This is just for annotation and has no effect on the functionality of the model.\n']);
0057 [filePath A B]=fileparts(filename);
0058 
0059 if ~any(filePath)
0060    filePath=pwd; 
0061 end
0062 %If a filename and not only a path was submitted then store the filename
0063 if any(A) && any(B)
0064     excelFile=[filePath '\' A B];
0065 else
0066     excelFile=[];
0067 end
0068 
0069 if filePath(end)~='\'
0070     filePath=[filePath '\'];
0071 end
0072 
0073 %If the folder doesn't exist then create it
0074 if ~exist(filePath,'dir')
0075     mkdir(filePath);
0076 end
0077 
0078 %Remove all leading and trailing white-spaces from all names. This should
0079 %be ok, but I'm not 100% sure
0080 model.rxns=strtrim(model.rxns);
0081 model.mets=strtrim(model.mets);
0082 model.metName=strtrim(model.metNames);
0083 model.rxnNames=strtrim(model.rxnNames);
0084 model.compNames=strtrim(model.compNames);
0085 
0086 %The rest of the fields are checked further down because I don't want to
0087 %make too many checks for structure names here
0088 
0089 model.equations=constructEquations(model,model.rxns,true);
0090 
0091 %Open for printing the rxn sheet
0092 rxnFile=fopen(fullfile(filePath,'excelRxns.txt'),'wt');
0093 
0094 %Print header
0095 fprintf(rxnFile,'#\tRXNID\tNAME\tEQUATION\tEC-NUMBER\tGENE ASSOCIATION\tLOWER BOUND\tUPPER BOUND\tOBJECTIVE\tCOMPARTMENT\tSUBSYSTEM\tSBO TERM\tREPLACEMENT ID\n');
0096 
0097 %Check if it should print EC-codes
0098 if isfield(model,'eccodes');
0099     model.eccodes=strtrim(model.eccodes);
0100     printEC=true;
0101 else
0102     printEC=false;
0103 end
0104 
0105 %Check if it should print genes
0106 if isfield(model,'grRules');
0107     model.grRules=strtrim(model.grRules);    
0108     printRules=true;
0109     
0110     %Also do some parsing here
0111     rules=model.grRules;
0112     rules=strrep(rules,'(','');
0113     rules=strrep(rules,')','');
0114     rules=strrep(rules,' and ',':');
0115     rules=strrep(rules,' or ',';');
0116 else
0117     printRules=false;
0118 end
0119 
0120 %Check if it should print subsystems
0121 if isfield(model,'subSystems');
0122     model.subSystems=strtrim(model.subSystems);
0123     printSubSystems=true;
0124 else
0125     printSubSystems=false;
0126 end
0127 
0128 %Loop through the reactions
0129 for i=1:numel(model.rxns)
0130    fprintf(rxnFile,['\t' model.rxns{i} '\t' model.rxnNames{i} '\t' model.equations{i} '\t']);
0131    
0132    if printEC==true
0133         fprintf(rxnFile,[model.eccodes{i} '\t']);
0134    else
0135         fprintf(rxnFile,'\t');
0136    end
0137    
0138    if printRules==true
0139         fprintf(rxnFile,[rules{i} '\t']);
0140    else
0141         fprintf(rxnFile,'\t');
0142    end
0143    
0144    %Print bounds and objectives
0145    fprintf(rxnFile,[num2str(model.lb(i)) '\t' num2str(model.ub(i)) '\t']);
0146    
0147    if model.c(i)~=0
0148         fprintf(rxnFile,[num2str(model.c(i)) '\t' ]);
0149    else
0150         fprintf(rxnFile,'\t');
0151    end
0152    
0153    fprintf(rxnFile,[model.comps{1} '\t']);
0154    
0155    if printSubSystems==true
0156         fprintf(rxnFile,[model.subSystems{i} '\t']);
0157    else
0158         fprintf(rxnFile,'\t');
0159    end
0160    
0161    %Print SBO-terms and Replacement IDs. This is not implemented yet
0162    fprintf(rxnFile,'\t\t');
0163    
0164    fprintf(rxnFile,'\n');
0165 end
0166 
0167 fclose(rxnFile);
0168 
0169 %Open for printing the metabolites sheet
0170 metFile=fopen(fullfile(filePath,'excelMets.txt'),'wt');
0171 
0172 %Check if it should print unconstrained info
0173 if isfield(model,'unconstrained')
0174     printUnconstrained=true;
0175 else
0176     printUnconstrained=false;
0177 end
0178 
0179 %Check if it should print miriam info
0180 if isfield(model,'metMiriams')
0181     printMiriam=true;
0182 else
0183     printMiriam=false;
0184 end
0185 
0186 %Check if it should print formula info
0187 if isfield(model,'metFormulas')
0188     model.metFormulas=strtrim(model.metFormulas);
0189     printFormulas=true;
0190 else
0191     printFormulas=false;
0192 end
0193 
0194 %Check if it should print InChi info
0195 if isfield(model,'inchis')
0196     model.inchis=strtrim(model.inchis);
0197     printInchis=true;
0198 else
0199     printInchis=false;
0200 end
0201 
0202 %Print header
0203 fprintf(metFile,'#\tMETID\tMETNAME\tUNCONSTRAINED\tMIRIAM\tCOMPOSITION\tInChI\tCOMPARTMENT\tREPLACEMENT ID\tMETS FIELD\n');
0204 
0205 %Loop through the metabolites
0206 for i=1:numel(model.mets)
0207    fprintf(metFile,['\t' model.metNames{i} '[' model.comps{model.metComps(i)} ']\t' model.metNames{i} '\t']);
0208    
0209    if printUnconstrained==true
0210        if model.unconstrained(i)~=0
0211             fprintf(metFile,'true\t');
0212        else
0213             fprintf(metFile,'\t');
0214        end
0215    else
0216        fprintf(metFile,'\t');
0217    end
0218    
0219    if printMiriam==true
0220        if ~isempty(model.metMiriams{i})
0221             fprintf(metFile,[strtrim(model.metMiriams{i}.name{1}) ':' strtrim(model.metMiriams{i}.value{1}) '\t']);
0222        else
0223             fprintf(metFile,'\t');
0224        end
0225    else
0226        fprintf(metFile,'\t');
0227    end
0228    
0229    if printFormulas==true
0230        %Print all fomulas if there is no InChi fiels
0231        if printInchis==false
0232            fprintf(metFile,[model.metFormulas{i} '\t']);
0233        else
0234            %Check if there is an available InChi. If so don't print
0235            %anything
0236            if isempty(model.inchis{i})
0237                fprintf(metFile,[model.metFormulas{i} '\t']);
0238            else
0239                fprintf(metFile,'\t');
0240            end
0241        end
0242    else
0243        fprintf(metFile,'\t');
0244    end
0245    
0246    if printInchis==true
0247        fprintf(metFile,[model.inchis{i} '\t']);
0248    else
0249        fprintf(metFile,'\t');
0250    end
0251    
0252    fprintf(metFile,[model.comps{model.metComps(i)} '\t']);
0253    
0254    %There can be no replacement IDs in the structure, but it has to be
0255    %something to give working met IDs.
0256    fprintf(metFile,['m' int2str(i) '\t']);
0257    
0258    %Print the model.mets field. The reason for not putting this as
0259    %replacement ID is that it's not guaranteed to be a valid SBML id.
0260    fprintf(metFile,[model.mets{i} '\t']);
0261    
0262    fprintf(metFile,'\n');
0263 end
0264 
0265 fclose(metFile);
0266 
0267 if isfield(model,'genes')
0268     %Open for printing the genes sheet
0269     geneFile=fopen(fullfile(filePath,'excelGenes.txt'),'wt');
0270 
0271     %Check if it should print miriam structures
0272     if isfield(model,'geneMiriams')
0273         printMiriams=true;
0274     else
0275         printMiriams=false;
0276     end
0277 
0278     %Check if it should print short gene names
0279     if isfield(model,'geneShortNames');
0280         model.geneShortNames=strtrim(model.geneShortNames);
0281         printShortNames=true;
0282     else
0283         printShortNames=false;
0284     end
0285 
0286     %Print header
0287     fprintf(geneFile,'#\tGENE NAME\tGENE ID 1\tGENE ID 2\tSHORT NAME\tCOMPARTMENT\tKEGG MAPS\n');
0288 
0289     %Loop through the genes
0290     for i=1:numel(model.genes)
0291         fprintf(geneFile,['\t' model.genes{i} '\t']);
0292 
0293         if printMiriams==true
0294             %This is a little tricky. Should print those IDs that are not for
0295             %kegg maps.
0296             if ~isempty(model.geneMiriams{i})
0297                 nonKegg=1:numel(model.geneMiriams{i}.name);
0298                 kegg=strmatch('kegg.pathway',strtrim(model.geneMiriams{i}.name),'exact');
0299                 nonKegg(kegg)=[];
0300 
0301                 if ~isempty(nonKegg)
0302                     %The number here must be 1 or 2 as the converter is written
0303                     %now
0304                     fprintf(geneFile,[strtrim(model.geneMiriams{i}.name{nonKegg(1)}) ':' strtrim(model.geneMiriams{i}.value{nonKegg(1)}) '\t']);
0305 
0306                     if numel(nonKegg)>1
0307                         fprintf(geneFile,[strtrim(model.geneMiriams{i}.name{nonKegg(2)}) ':' strtrim(model.geneMiriams{i}.value{nonKegg(2)}) '\t']);
0308                     else
0309                         fprintf(geneFile,'\t');
0310                     end
0311                 else
0312                     %Only kegg maps
0313                     fprintf(geneFile,'\t\t');
0314                 end
0315             else
0316                 fprintf(geneFile,'\t\t');
0317             end
0318         else
0319             fprintf(geneFile,'\t\t');
0320         end
0321 
0322         if printShortNames==true
0323             fprintf(geneFile,[model.geneShortNames{i} '\t']);
0324         else
0325             fprintf(geneFile,'\t');
0326         end
0327 
0328         %Since genes have no compartment in the model structure
0329         fprintf(geneFile,[model.comps{1} '\t']);
0330 
0331         if printMiriams==true
0332             %This is a little tricky. Should print those IDs that are for
0333             %kegg maps.
0334             if ~isempty(model.geneMiriams{i})
0335                 kegg=strmatch('kegg.pathway',strtrim(model.geneMiriams{i}.name),'exact');
0336                 if ~isempty(kegg)
0337                     for j=1:numel(kegg)
0338                        if j<numel(kegg)
0339                            pad=':';
0340                        else
0341                            pad='';
0342                        end
0343                        fprintf(geneFile,[strtrim(model.geneMiriams{i}.value{kegg(j)}) pad]);
0344                     end
0345                 else
0346                     %No kegg maps
0347                     fprintf(geneFile,'\t');
0348                 end
0349             else
0350                 fprintf(geneFile,'\t');
0351             end
0352         else
0353             fprintf(geneFile,'\t');
0354         end
0355 
0356         fprintf(geneFile,'\n');
0357     end
0358     fclose(geneFile);
0359 end
0360 
0361 if isfield(model,'id')
0362     %Open for printing the model sheet
0363     modelFile=fopen(fullfile(filePath,'excelModel.txt'),'wt');
0364 
0365     %Print header
0366     fprintf(geneFile,'#\tMODELID\tMODELNAME\tDEFAULT LOWER\tDEFAULT UPPER\tCONTACT GIVEN NAME\tCONTACT FAMILY NAME\tCONTACT EMAIL\tORGANIZATION\tTAXONOMY\tNOTES\n');
0367     
0368     %Print model ID and name. It is assumed that the default lower/upper
0369     %bound correspond to min/max of the bounds
0370     fprintf(geneFile,['\t' model.id '\t' model.description '\t' num2str(min(model.lb)) '\t' num2str(max(model.ub)) '\tRasmus\tAgren\trasmus.agren@chalmers.se\tChalmers University of Technology\t9606\t\n']);
0371     fclose(modelFile);
0372 end
0373 
0374 if isfield(model,'comps')
0375     if ~isfield(model,'compOutside')
0376        model.compOutside=cell(numel(model.comps),1);
0377        model.compOutside(:)={''};
0378     end
0379     %Open for printing the model sheet
0380     compsFile=fopen(fullfile(filePath,'excelComps.txt'),'wt');
0381 
0382     %Print header
0383     fprintf(compsFile,'#\tCOMPABBREV\tCOMPNAME\tINSIDE\tGO TERM\n');
0384     
0385     for i=1:numel(model.comps)
0386        fprintf(compsFile,['\t' model.comps{i} '\t' model.compNames{i} '\t' model.compOutside{i} '\t\n']);
0387     end
0388     fclose(compsFile);
0389 end
0390 
0391 %Now it has generated the text files. If a full file name was submitted
0392 %then should those files be merged into one Excel file and then deleted
0393 if any(excelFile)
0394     foundError=false;
0395     textFiles={'excelRxns.txt' 'excelMets.txt' 'excelGenes.txt' 'excelComps.txt' 'excelModel.txt'};
0396     formatStrings={'%s%s%s%s%s%s%n%n%n%s%s%s%s' '%s%s%s%s%s%s%s%s%s%s' '%s%s%s%s%s%s%s' '%s%s%s%s%s' '%s%s%s%n%n%s%s%s%s%s%s'};
0397     sheets={'RXNS' 'METS' 'GENES' 'COMPS' 'MODEL'};
0398     captions={{'#' 'RXNID' 'NAME' 'EQUATION' 'EC-NUMBER' 'GENE ASSOCIATION' 'LOWER BOUND' 'UPPER BOUND' 'OBJECTIVE' 'COMPARTMENT' 'SUBSYSTEM' 'SBO TERM' 'REPLACEMENT ID'};...
0399             {'#' 'METID' 'METNAME' 'UNCONSTRAINED' 'MIRIAM' 'COMPOSITION' 'InChI' 'COMPARTMENT' 'REPLACEMENT ID' 'METS FIELD'};...
0400             {'#' 'GENE NAME' 'GENE ID 1' 'GENE ID 2' 'SHORT NAME' 'COMPARTMENT' 'KEGG MAPS'};...
0401             {'#' 'COMPABBREV' 'COMPNAME' 'INSIDE' 'GO TERM'};...
0402             {'#' 'MODELID' 'MODELNAME' 'DEFAULT LOWER' 'DEFAULT UPPER' 'CONTACT GIVEN NAME' 'CONTACT FAMILY NAME' 'CONTACT EMAIL' 'ORGANIZATION' 'TAXONOMY' 'NOTES'}};
0403     for i=1:numel(textFiles)    
0404         fid=fopen(fullfile(filePath,textFiles{i}),'r');
0405         C = textscan(fid,formatStrings{i},'Delimiter','\t','Headerlines',1,'Whitespace','','BufSize',15000);
0406         fclose(fid);
0407 
0408         %Since xlswrite requires a cell array I construct it here. This might
0409         %not be the fastest or best way to do this.
0410         cellArray=[];
0411         for j=1:numel(C)
0412             %This is because numbers are read as vectors
0413             if iscell(C{j})
0414                 %It could be that the cell array contains empty values. Those
0415                 %should be ''
0416                 cellArray=[cellArray C{j}];
0417             else
0418                 cellArray=[cellArray num2cell(C{j})];
0419             end
0420         end
0421         %I do like this rather than to read the captions from the file because
0422         %it makes it easier with the doubles
0423         cellArray=[captions{i};cellArray];
0424         errorFlag=xlswrite(excelFile,cellArray,sheets{i});
0425         
0426         if errorFlag==0
0427            fprintf('There was an error in writing the Excel file. Keeping the text files in the specified directory');
0428            foundError=true;
0429         end
0430     end
0431     
0432     %Delete the text files if the Excel sheet was generated sucessfully
0433     if foundError==false
0434         for i=1:numel(textFiles)
0435            delete(fullfile(filePath,textFiles{i}));
0436         end
0437         
0438         %Also remove the empty sheets
0439         DeleteEmptyExcelSheets(excelFile);
0440     end
0441 end
0442 end
0443 
0444 % DeleteEmptyExcelSheets: deletes all empty sheets in an xls-file
0445 %
0446 %==========================================================================
0447 % Version : 1.0
0448 % Author : hnagel
0449 % Date : 27/04/2007
0450 % Tested : 02/05/2007 (DR)
0451 %==========================================================================
0452 %
0453 % This function looped through all sheets and deletes those sheets that are
0454 % empty. Can be used to clean a newly created xls-file after all results
0455 % have been saved in it.
0456 %
0457 % References: Torsten Jacobsen, "delete standard excel sheet"
0458 %---------------------------------------------------------------------
0459 %
0460 % Input:
0461 %
0462 % fileName: name of xls file
0463 %
0464 %---------------------------------------------------------------------
0465 %
0466 % Output:
0467 %
0468 % none
0469 %
0470 %---------------------------------------------------------------------
0471 %
0472 % See also XLSWRITE
0473 %---------------------------------------------------------------------
0474 % Changes
0475 %---------------------------------------------------------------------
0476 %
0477 % Name :
0478 % Date :
0479 % Description:
0480 % Indicated :
0481 function DeleteEmptyExcelSheets(fileName)
0482 % Check whether the file exists
0483 if ~exist(fileName,'file')
0484 error([fileName ' does not exist !']);
0485 else
0486 % Check whether it is an Excel file
0487 typ = xlsfinfo(fileName);
0488 if ~strcmp(typ,'Microsoft Excel Spreadsheet')
0489 error([fileName ' not an Excel sheet !']);
0490 end
0491 end
0492 
0493 % If fileName does not contain a "\" the name of the current path is added
0494 % to fileName. The reason for this is that the full path is required for
0495 % the command "excelObj.workbooks.Open(fileName)" to work properly.
0496 if isempty(strfind(fileName,'\'))
0497 fileName = [cd '\' fileName];
0498 end
0499 
0500 excelObj = actxserver('Excel.Application');
0501 excelWorkbook = excelObj.workbooks.Open(fileName);
0502 worksheets = excelObj.sheets;
0503 sheetIdx = 1;
0504 sheetIdx2 = 1;
0505 numSheets = worksheets.Count;
0506 % Prevent beeps from sounding if we try to delete a non-empty worksheet.
0507 excelObj.EnableSound = false;
0508 
0509 % Loop over all sheets
0510 while sheetIdx2 <= numSheets
0511 % Saves the current number of sheets in the workbook
0512 temp = worksheets.count;
0513 % Check whether the current worksheet is the last one. As there always
0514 % need to be at least one worksheet in an xls-file the last sheet must
0515 % not be deleted.
0516 if or(sheetIdx>1,numSheets-sheetIdx2>0)
0517 % worksheets.Item(sheetIdx).UsedRange.Count is the number of used cells.
0518 % This will be 1 for an empty sheet. It may also be one for certain other
0519 % cases but in those cases, it will beep and not actually delete the sheet.
0520 if worksheets.Item(sheetIdx).UsedRange.Count == 1
0521 worksheets.Item(sheetIdx).Delete;
0522 end
0523 end
0524 % Check whether the number of sheets has changed. If this is not the
0525 % case the counter "sheetIdx" is increased by one.
0526 if temp == worksheets.count;
0527 sheetIdx = sheetIdx + 1;
0528 end
0529 sheetIdx2 = sheetIdx2 + 1; % prevent endless loop...
0530 end
0531 excelObj.EnableSound = true;
0532 excelWorkbook.Save;
0533 excelWorkbook.Close(false);
0534 excelObj.Quit;
0535 delete(excelObj);
0536 return; 
0537 end

Generated on Tue 16-Jul-2013 21:50:02 by m2html © 2005