0001 function blastStructure=getBlastFromExcel(models,blastFile,organismId)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 blastStructure=[];
0029
0030
0031 organisms=cell(numel(models)+1,1);
0032 organisms{1}=organismId;
0033 for i=1:numel(models)
0034 organisms{i+1}=models{i}.id;
0035 end
0036
0037
0038 [type, sheets]=xlsfinfo(blastFile);
0039
0040
0041 if ~strcmp(type,'Microsoft Excel Spreadsheet')
0042 throw(MException('','The file is not a Microsoft Excel Spreadsheet'));
0043 end
0044
0045 for i=1:numel(sheets)
0046
0047
0048 [values,dataSheet]=xlsread(blastFile,i);
0049 labels=dataSheet(1,:);
0050 if strcmpi(labels{3},'E-value') && strcmpi(labels{4},'Alignment length') && strcmpi(labels{5},'Identity')
0051
0052 fromID=find(strcmpi(labels{1},organisms));
0053 toID=find(strcmpi(labels{2},organisms));
0054
0055
0056 if any(fromID) && any(toID) && (toID==1 || fromID==1)
0057
0058
0059
0060 emptyNames=cellfun(@isempty,dataSheet(2:end,1)) | cellfun(@isempty,dataSheet(2:end,2));
0061 if any(emptyNames)
0062 if all(emptyNames)
0063 throw(MException('',['Only empty gene names in sheet from ' organisms{fromID} ' to ' organisms{toID}]));
0064 else
0065 fprintf(['WARNING: Empty gene names in sheet from ' organisms{fromID} ' to ' organisms{toID} '. Ignoring genes with empty names\n']);
0066 end
0067 end
0068 blastStructure(numel(blastStructure)+1).toId=organisms{toID};
0069 blastStructure(numel(blastStructure)).fromId=organisms{fromID};
0070 blastStructure(numel(blastStructure)).fromGenes=dataSheet(2:end,1);
0071 blastStructure(numel(blastStructure)).toGenes=dataSheet(2:end,2);
0072 blastStructure(numel(blastStructure)).evalue=values(:,1);
0073 blastStructure(numel(blastStructure)).aligLen=values(:,2);
0074 blastStructure(numel(blastStructure)).identity=values(:,3);
0075
0076
0077
0078
0079 I=isnan(blastStructure(end).evalue) | isnan(blastStructure(end).aligLen) | isnan(blastStructure(end).identity);
0080 blastStructure(end).fromGenes(I)=[];
0081 blastStructure(end).toGenes(I)=[];
0082 blastStructure(end).evalue(I)=[];
0083 blastStructure(end).aligLen(I)=[];
0084 blastStructure(end).identity(I)=[];
0085 else
0086 if isempty(toID) || isempty(fromID)
0087 fprintf('The data in sheet %s has no corresponding model. Ignoring sheet\n',sheets{i});
0088 else
0089 fprintf('The data in sheet %s does not involve the organism of interest. Ignoring sheet\n',sheets{i});
0090 end
0091 end
0092 else
0093 fprintf('WARNING: The data in sheet %s is not correctly formatted. Ignoring sheet\n',sheets{i});
0094 end
0095 end
0096
0097 end