diff --git a/INIT/getINITModel.m b/INIT/getINITModel.m index 9626d9e6..3d21174d 100644 --- a/INIT/getINITModel.m +++ b/INIT/getINITModel.m @@ -391,6 +391,9 @@ if isfield(model,'geneShortNames') model.geneShortNames(I)=[]; end +if isfield(model,'proteinNames') + model.proteinNames(I)=[]; +end if isfield(model,'geneMiriams') model.geneMiriams(I)=[]; end diff --git a/INIT/mergeLinear.m b/INIT/mergeLinear.m index 4ce4c3af..ae4618ba 100644 --- a/INIT/mergeLinear.m +++ b/INIT/mergeLinear.m @@ -29,6 +29,9 @@ if isfield(reducedModel,'geneShortNames') reducedModel.geneShortNames={}; end +if isfield(reducedModel,'proteinNames') + reducedModel.proteinNames={}; +end if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams={}; end diff --git a/INIT/removeLowScoreGenes.m b/INIT/removeLowScoreGenes.m index 589a54b9..d49fbd7d 100644 --- a/INIT/removeLowScoreGenes.m +++ b/INIT/removeLowScoreGenes.m @@ -119,6 +119,9 @@ if isfield(newModel,'geneShortNames') newModel.geneShortNames(remInd) = []; end +if isfield(newModel,'proteinNames') + newModel.proteinNames(remInd) = []; +end if isfield(newModel,'geneMiriams') newModel.geneMiriams(remInd) = []; end diff --git a/core/addGenesRaven.m b/core/addGenesRaven.m index 9be3abf4..30e73b83 100755 --- a/core/addGenesRaven.m +++ b/core/addGenesRaven.m @@ -14,6 +14,8 @@ % default '') % geneMiriams cell array with MIRIAM structures (optional, % default []) +% proteinNames cell array of protein names associated to +% each gene (optional, default '') % % newModel an updated model structure % @@ -56,6 +58,9 @@ if isfield(genesToAdd,'geneShortNames') genesToAdd.geneShortNames(I)=[]; end + if isfield(genesToAdd,'proteinNames') + genesToAdd.proteinNames(I)=[]; + end if isfield(genesToAdd,'geneMiriams') genesToAdd.geneMiriams(I)=[]; end @@ -81,6 +86,24 @@ newModel.geneShortNames=[newModel.geneShortNames;filler]; end end +if isfield(genesToAdd,'proteinNames') + genesToAdd.proteinNames=convertCharArray(genesToAdd.proteinNames); + if numel(genesToAdd.proteinNames)~=nGenes + EM='genesToAdd.proteinNames must have the same number of elements as genesToAdd.genes'; + dispEM(EM); + end + %Add empty field if it doesn't exist + if ~isfield(newModel,'proteinNames') + newModel.proteinNames=largeFiller; + end + newModel.proteinNames=[newModel.proteinNames;genesToAdd.proteinNames(:)]; +else + %Add empty strings if structure is in model + if isfield(newModel,'proteinNames') + newModel.proteinNames=[newModel.proteinNames;filler]; + end +end + %Don't check the type of geneMiriams if isfield(genesToAdd,'geneMiriams') diff --git a/core/deleteUnusedGenes.m b/core/deleteUnusedGenes.m index 49cff71f..0fb2e2e1 100755 --- a/core/deleteUnusedGenes.m +++ b/core/deleteUnusedGenes.m @@ -37,6 +37,10 @@ reducedModel.geneShortNames=reducedModel.geneShortNames(toKeep); end +if isfield(reducedModel,'proteinNames') + reducedModel.proteinNames=reducedModel.proteinNames(toKeep); +end + if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); end diff --git a/core/getModelFromHomology.m b/core/getModelFromHomology.m index 242e6f36..42ef3f82 100755 --- a/core/getModelFromHomology.m +++ b/core/getModelFromHomology.m @@ -107,14 +107,17 @@ modelNames=cell(numel(models),1); for i=1:numel(models) modelNames{i}=models{i}.id; - %Gene short names and geneMiriams are often different between species, - %safer not to include them + %Gene short names, geneMiriams and proteins are often different + %between species, safer not to include them if isfield(models{i},'geneShortNames') models{i}=rmfield(models{i},'geneShortNames'); end if isfield(models{i},'geneMiriams') models{i}=rmfield(models{i},'geneMiriams'); end + if isfield(models{i},'proteinNames') + models{i}=rmfield(models{i},'proteinNames'); + end %The geneFrom field also loses meaning if the genes are replaced by %orthologs if isfield(models{i},'geneFrom') diff --git a/core/mergeModels.m b/core/mergeModels.m index 19de25c1..c7b7fb15 100755 --- a/core/mergeModels.m +++ b/core/mergeModels.m @@ -492,7 +492,11 @@ if isfield(models{i},'geneShortNames') model.geneShortNames=models{i}.geneShortNames; end - + + if isfield(models{i},'proteinNames') + model.proteinNames=models{i}.proteinNames; + end + if isfield(models{i},'geneMiriams') model.geneMiriams=models{i}.geneMiriams; end @@ -530,7 +534,23 @@ model.geneShortNames=[model.geneShortNames;emptyGeneSN]; end end - + + if isfield(models{i},'proteinNames') + if isfield(model,'proteinNames') + model.proteinNames=[model.proteinNames;models{i}.proteinNames(genesToAdd)]; + else + emptyGeneSN=cell(numel(model.genes)-numel(genesToAdd),1); + emptyGeneSN(:)={''}; + model.proteinNames=[emptyGeneSN;models{i}.proteinNames(genesToAdd)]; + end + else + if isfield(model,'proteinNames') + emptyGeneSN=cell(numel(genesToAdd),1); + emptyGeneSN(:)={''}; + model.proteinNames=[model.proteinNames;emptyGeneSN]; + end + end + if isfield(models{i},'geneMiriams') if isfield(model,'geneMiriams') model.geneMiriams=[model.geneMiriams;models{i}.geneMiriams(genesToAdd)]; diff --git a/core/permuteModel.m b/core/permuteModel.m index 200bf283..1a7fca61 100755 --- a/core/permuteModel.m +++ b/core/permuteModel.m @@ -132,6 +132,9 @@ if isfield(newModel,'geneShortNames') newModel.geneShortNames=newModel.geneShortNames(indexes); end + if isfield(newModel,'proteinNames') + newModel.proteinNames=newModel.proteinNames(indexes); + end if isfield(newModel,'rxnGeneMat') newModel.rxnGeneMat=newModel.rxnGeneMat(:,indexes); end diff --git a/core/predictLocalization.m b/core/predictLocalization.m index bab49bb0..d05fe0ef 100755 --- a/core/predictLocalization.m +++ b/core/predictLocalization.m @@ -199,6 +199,9 @@ if isfield(model,'geneMiriams') model.geneMiriams=[model.geneMiriams;{[]}]; end + if isfield(model,'proteinNames') + model.proteinNames=[model.proteinNames;{[]}]; + end if isfield(model,'geneFrom') model.geneFrom=[model.geneFrom;{{'FAKE'}}]; end @@ -258,6 +261,9 @@ if isfield(model,'geneShortNames') model.geneShortNames=[model.geneShortNames;{''}]; end + if isfield(model,'proteinNames') + model.proteinNames=[model.proteinNames;{''}]; + end if isfield(model,'geneFrom') model.geneFrom=[model.geneFrom;{'COMPLEX'}]; end @@ -759,6 +765,9 @@ if isfield(outModel,'geneShortNames') outModel.geneShortNames(I)=[]; end +if isfield(outModel,'proteinNames') + outModel.proteinNames(I)=[]; +end outModel.rxnGeneMat(:,I)=[]; %Fix grRules and reconstruct rxnGeneMat diff --git a/core/removeReactions.m b/core/removeReactions.m index d2af5a4a..d296c566 100755 --- a/core/removeReactions.m +++ b/core/removeReactions.m @@ -129,6 +129,10 @@ if isfield(reducedModel,'geneShortNames') reducedModel.geneShortNames=reducedModel.geneShortNames(toKeep); end + + if isfield(reducedModel,'proteinNames') + reducedModel.proteinNames=reducedModel.proteinNames(toKeep); + end if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); diff --git a/core/simplifyModel.m b/core/simplifyModel.m index 2dbc72ec..ecf8a6af 100755 --- a/core/simplifyModel.m +++ b/core/simplifyModel.m @@ -221,6 +221,9 @@ if isfield(reducedModel,'geneShortNames') reducedModel.geneShortNames={}; end + if isfield(reducedModel,'proteinNames') + reducedModel.proteinNames={}; + end if isfield(reducedModel,'geneMiriams') reducedModel.geneMiriams={}; end diff --git a/doc/INIT/getINITModel.html b/doc/INIT/getINITModel.html index 66b87a44..bd3c7d0e 100644 --- a/doc/INIT/getINITModel.html +++ b/doc/INIT/getINITModel.html @@ -544,77 +544,80 @@

SOURCE CODE ^if isfield(model,'geneShortNames') 0392 model.geneShortNames(I)=[]; 0393 end -0394 if isfield(model,'geneMiriams') -0395 model.geneMiriams(I)=[]; +0394 if isfield(model,'proteinNames') +0395 model.proteinNames(I)=[]; 0396 end -0397 if isfield(model,'geneFrom') -0398 model.geneFrom(I)=[]; +0397 if isfield(model,'geneMiriams') +0398 model.geneMiriams(I)=[]; 0399 end -0400 if isfield(model,'geneComps') -0401 model.geneComps(I)=[]; +0400 if isfield(model,'geneFrom') +0401 model.geneFrom(I)=[]; 0402 end -0403 -0404 %At this stage the model will contain some exchange reactions but probably -0405 %not all (and maybe zero). This can be inconvenient, so all exchange -0406 %reactions from the reference model are added, except for those which -0407 %involve metabolites that are not in the model. -0408 -0409 %First delete and included exchange reactions in order to prevent the order -0410 %from changing -0411 model=removeReactions(model,getExchangeRxns(model)); -0412 -0413 %Create a model with only the exchange reactions in refModel -0414 excModel=removeReactions(refModel,setdiff(refModel.rxns,getExchangeRxns(refModel)),true,true); +0403 if isfield(model,'geneComps') +0404 model.geneComps(I)=[]; +0405 end +0406 +0407 %At this stage the model will contain some exchange reactions but probably +0408 %not all (and maybe zero). This can be inconvenient, so all exchange +0409 %reactions from the reference model are added, except for those which +0410 %involve metabolites that are not in the model. +0411 +0412 %First delete and included exchange reactions in order to prevent the order +0413 %from changing +0414 model=removeReactions(model,getExchangeRxns(model)); 0415 -0416 %Find the metabolites there which are not exchange metabolites and which do -0417 %not exist in the output model -0418 I=~ismember(excModel.mets,model.mets) & excModel.unconstrained==0; -0419 -0420 %Then find those reactions and delete them -0421 [~, J]=find(excModel.S(I,:)); -0422 excModel=removeReactions(excModel,J,true,true); -0423 -0424 %Merge with the output model -0425 model=mergeModels({model;excModel},'metNames'); -0426 model.id='INITModel'; -0427 model.name=['Automatically generated model for ' tissue]; -0428 if any(celltype) -0429 model.name=[model.name ' - ' celltype]; -0430 end -0431 -0432 if printReport==true -0433 printScores(model,'Final model statistics',hpaData,arrayData,tissue,celltype); -0434 end -0435 -0436 %Add information about essential reactions and reactions included for -0437 %gap-filling and return a taskReport -0438 if ~isempty(taskStructure) -0439 I=find(taskReport.ok); %Ignore failed tasks -0440 for i=1:numel(I) -0441 taskReport.essential{I(i),1}=cModel.rxns(essentialRxnMat(:,I(i))); -0442 taskReport.gapfill{I(i),1}=refModelNoExc.rxns(addedRxnMat(:,i)); -0443 end -0444 else -0445 taskReport=[]; -0446 end -0447 -0448 %Fix grRules and reconstruct rxnGeneMat -0449 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0450 model.grRules = grRules; -0451 model.rxnGeneMat = rxnGeneMat; -0452 end -0453 -0454 %This is for printing a summary of a model -0455 function [rxnS, geneS]=printScores(model,name,hpaData,arrayData,tissue,celltype) -0456 [a, b]=scoreModel(model,hpaData,arrayData,tissue,celltype); -0457 rxnS=mean(a); -0458 geneS=mean(b(~isinf(b))); -0459 fprintf([name ':\n']); -0460 fprintf(['\t' num2str(numel(model.rxns)) ' reactions, ' num2str(numel(model.genes)) ' genes\n']); -0461 fprintf(['\tMean reaction score: ' num2str(rxnS) '\n']); -0462 fprintf(['\tMean gene score: ' num2str(geneS) '\n']); -0463 fprintf(['\tReactions with positive scores: ' num2str(100*sum(a>0)/numel(a)) '%%\n\n']); -0464 end +0416 %Create a model with only the exchange reactions in refModel +0417 excModel=removeReactions(refModel,setdiff(refModel.rxns,getExchangeRxns(refModel)),true,true); +0418 +0419 %Find the metabolites there which are not exchange metabolites and which do +0420 %not exist in the output model +0421 I=~ismember(excModel.mets,model.mets) & excModel.unconstrained==0; +0422 +0423 %Then find those reactions and delete them +0424 [~, J]=find(excModel.S(I,:)); +0425 excModel=removeReactions(excModel,J,true,true); +0426 +0427 %Merge with the output model +0428 model=mergeModels({model;excModel},'metNames'); +0429 model.id='INITModel'; +0430 model.name=['Automatically generated model for ' tissue]; +0431 if any(celltype) +0432 model.name=[model.name ' - ' celltype]; +0433 end +0434 +0435 if printReport==true +0436 printScores(model,'Final model statistics',hpaData,arrayData,tissue,celltype); +0437 end +0438 +0439 %Add information about essential reactions and reactions included for +0440 %gap-filling and return a taskReport +0441 if ~isempty(taskStructure) +0442 I=find(taskReport.ok); %Ignore failed tasks +0443 for i=1:numel(I) +0444 taskReport.essential{I(i),1}=cModel.rxns(essentialRxnMat(:,I(i))); +0445 taskReport.gapfill{I(i),1}=refModelNoExc.rxns(addedRxnMat(:,i)); +0446 end +0447 else +0448 taskReport=[]; +0449 end +0450 +0451 %Fix grRules and reconstruct rxnGeneMat +0452 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0453 model.grRules = grRules; +0454 model.rxnGeneMat = rxnGeneMat; +0455 end +0456 +0457 %This is for printing a summary of a model +0458 function [rxnS, geneS]=printScores(model,name,hpaData,arrayData,tissue,celltype) +0459 [a, b]=scoreModel(model,hpaData,arrayData,tissue,celltype); +0460 rxnS=mean(a); +0461 geneS=mean(b(~isinf(b))); +0462 fprintf([name ':\n']); +0463 fprintf(['\t' num2str(numel(model.rxns)) ' reactions, ' num2str(numel(model.genes)) ' genes\n']); +0464 fprintf(['\tMean reaction score: ' num2str(rxnS) '\n']); +0465 fprintf(['\tMean gene score: ' num2str(geneS) '\n']); +0466 fprintf(['\tReactions with positive scores: ' num2str(100*sum(a>0)/numel(a)) '%%\n\n']); +0467 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/INIT/mergeLinear.html b/doc/INIT/mergeLinear.html index 9bae294c..6643af31 100644 --- a/doc/INIT/mergeLinear.html +++ b/doc/INIT/mergeLinear.html @@ -92,187 +92,190 @@

SOURCE CODE ^if isfield(reducedModel,'geneShortNames') 0030 reducedModel.geneShortNames={}; 0031 end -0032 if isfield(reducedModel,'geneMiriams') -0033 reducedModel.geneMiriams={}; +0032 if isfield(reducedModel,'proteinNames') +0033 reducedModel.proteinNames={}; 0034 end -0035 if isfield(reducedModel,'geneComps') -0036 reducedModel.geneComps=[]; +0035 if isfield(reducedModel,'geneMiriams') +0036 reducedModel.geneMiriams={}; 0037 end -0038 -0039 nextGroupId = 1; -0040 origRxnIds = reducedModel.rxns; -0041 groupIds = zeros(numel(reducedModel.rxns),1); -0042 reversedRxns = false(numel(reducedModel.rxns),1); -0043 -0044 %Loop through and iteratively group linear reactions -0045 while 1 -0046 %Get the banned reaction indexes. Note that the indexes will change -0047 %in each iteration, but the names will not as they won't be merged -0048 %with any other reaction -0049 bannedIndexes=getIndexes(reducedModel,noMergeRxns,'rxns'); -0050 -0051 %Select all metabolites that are only present as reactants/products -0052 %in one reaction -0053 twoNonZero = find(sum(reducedModel.S ~= 0, 2) == 2); -0054 -0055 mergedSome=false; -0056 -0057 %Loop through each of them and see if the reactions should be -0058 %merged -0059 for i=1:numel(twoNonZero) -0060 involvedRxns=find(reducedModel.S(twoNonZero(i),:)); -0061 %Check that we can have one positive and one negative -0062 pos = sum(reducedModel.S(twoNonZero(i),involvedRxns).' > 0 | reducedModel.rev(involvedRxns)); -0063 neg = sum(reducedModel.S(twoNonZero(i),involvedRxns).' < 0 | reducedModel.rev(involvedRxns)); -0064 -0065 -0066 %Check so that one or both of the reactions haven't been merged -0067 %already -0068 if numel(involvedRxns)==2 && isempty(intersect(bannedIndexes,involvedRxns)) && pos >= 1 && neg >= 1 -0069 %first, take care of a special case: If the first reaction is producing the metabolite and if it is reversible, -0070 %and the second is also producing it and is not reversible, change the order - the code below will not work otherwise -0071 if reducedModel.rev(involvedRxns(1)) && (~reducedModel.rev(involvedRxns(2))) && ... -0072 (reducedModel.S(twoNonZero(i),involvedRxns(1)) > 0) && (reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0) -0073 involvedRxns = flip(involvedRxns); -0074 end -0075 -0076 %first make sure the first reaction is producing the metabolite -0077 if reducedModel.S(twoNonZero(i),involvedRxns(1)) < 0 -0078 %it is not producing the metabolite - fix that -0079 %first choice: use the second reaction as producer if it is producing -0080 if reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0 -0081 involvedRxns = flip(involvedRxns);%make the second the first -0082 else -0083 %now we know that the second reaction is not producing, so we can safely try to make the first a producer -0084 if reducedModel.rev(involvedRxns(1)) == 1 -0085 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(1), groupIds, reversedRxns); -0086 else %ok, finally try to flip the second reaction -0087 if reducedModel.rev(involvedRxns(2)) == 1 -0088 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(2), groupIds, reversedRxns); -0089 involvedRxns = flip(involvedRxns);%make the second the first -0090 else -0091 error('We should never end up here!'); -0092 end -0093 end -0094 end -0095 end -0096 %Now, make sure the second rxn is a consumer -0097 if reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0 -0098 if reducedModel.rev(involvedRxns(2)) == 1 -0099 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(2), groupIds, reversedRxns); -0100 else -0101 error('We should never end up here!'); -0102 end -0103 end -0104 -0105 %Calculate how many times the second reaction has to be -0106 %multiplied before being merged with the first -0107 stoichRatio=abs(reducedModel.S(twoNonZero(i),involvedRxns(1))/reducedModel.S(twoNonZero(i),involvedRxns(2))); -0108 -0109 %Add the second to the first -0110 reducedModel.S(:,involvedRxns(1))=reducedModel.S(:,involvedRxns(1))+reducedModel.S(:,involvedRxns(2))*stoichRatio; +0038 if isfield(reducedModel,'geneComps') +0039 reducedModel.geneComps=[]; +0040 end +0041 +0042 nextGroupId = 1; +0043 origRxnIds = reducedModel.rxns; +0044 groupIds = zeros(numel(reducedModel.rxns),1); +0045 reversedRxns = false(numel(reducedModel.rxns),1); +0046 +0047 %Loop through and iteratively group linear reactions +0048 while 1 +0049 %Get the banned reaction indexes. Note that the indexes will change +0050 %in each iteration, but the names will not as they won't be merged +0051 %with any other reaction +0052 bannedIndexes=getIndexes(reducedModel,noMergeRxns,'rxns'); +0053 +0054 %Select all metabolites that are only present as reactants/products +0055 %in one reaction +0056 twoNonZero = find(sum(reducedModel.S ~= 0, 2) == 2); +0057 +0058 mergedSome=false; +0059 +0060 %Loop through each of them and see if the reactions should be +0061 %merged +0062 for i=1:numel(twoNonZero) +0063 involvedRxns=find(reducedModel.S(twoNonZero(i),:)); +0064 %Check that we can have one positive and one negative +0065 pos = sum(reducedModel.S(twoNonZero(i),involvedRxns).' > 0 | reducedModel.rev(involvedRxns)); +0066 neg = sum(reducedModel.S(twoNonZero(i),involvedRxns).' < 0 | reducedModel.rev(involvedRxns)); +0067 +0068 +0069 %Check so that one or both of the reactions haven't been merged +0070 %already +0071 if numel(involvedRxns)==2 && isempty(intersect(bannedIndexes,involvedRxns)) && pos >= 1 && neg >= 1 +0072 %first, take care of a special case: If the first reaction is producing the metabolite and if it is reversible, +0073 %and the second is also producing it and is not reversible, change the order - the code below will not work otherwise +0074 if reducedModel.rev(involvedRxns(1)) && (~reducedModel.rev(involvedRxns(2))) && ... +0075 (reducedModel.S(twoNonZero(i),involvedRxns(1)) > 0) && (reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0) +0076 involvedRxns = flip(involvedRxns); +0077 end +0078 +0079 %first make sure the first reaction is producing the metabolite +0080 if reducedModel.S(twoNonZero(i),involvedRxns(1)) < 0 +0081 %it is not producing the metabolite - fix that +0082 %first choice: use the second reaction as producer if it is producing +0083 if reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0 +0084 involvedRxns = flip(involvedRxns);%make the second the first +0085 else +0086 %now we know that the second reaction is not producing, so we can safely try to make the first a producer +0087 if reducedModel.rev(involvedRxns(1)) == 1 +0088 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(1), groupIds, reversedRxns); +0089 else %ok, finally try to flip the second reaction +0090 if reducedModel.rev(involvedRxns(2)) == 1 +0091 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(2), groupIds, reversedRxns); +0092 involvedRxns = flip(involvedRxns);%make the second the first +0093 else +0094 error('We should never end up here!'); +0095 end +0096 end +0097 end +0098 end +0099 %Now, make sure the second rxn is a consumer +0100 if reducedModel.S(twoNonZero(i),involvedRxns(2)) > 0 +0101 if reducedModel.rev(involvedRxns(2)) == 1 +0102 [reducedModel,reversedRxns] = flipRxn(reducedModel, involvedRxns(2), groupIds, reversedRxns); +0103 else +0104 error('We should never end up here!'); +0105 end +0106 end +0107 +0108 %Calculate how many times the second reaction has to be +0109 %multiplied before being merged with the first +0110 stoichRatio=abs(reducedModel.S(twoNonZero(i),involvedRxns(1))/reducedModel.S(twoNonZero(i),involvedRxns(2))); 0111 -0112 %Clear the second reaction -0113 reducedModel.S(:,involvedRxns(2))=0; +0112 %Add the second to the first +0113 reducedModel.S(:,involvedRxns(1))=reducedModel.S(:,involvedRxns(1))+reducedModel.S(:,involvedRxns(2))*stoichRatio; 0114 -0115 %This is to prevent numerical issues. It should be 0 -0116 %already -0117 reducedModel.S(twoNonZero(i),involvedRxns(1))=0; -0118 -0119 %At this point the second reaction is certain to be deleted -0120 %in a later step and can therefore be ignored +0115 %Clear the second reaction +0116 reducedModel.S(:,involvedRxns(2))=0; +0117 +0118 %This is to prevent numerical issues. It should be 0 +0119 %already +0120 reducedModel.S(twoNonZero(i),involvedRxns(1))=0; 0121 -0122 %Recalculate the bounds for the new reaction. This can be -0123 %problematic since the scale of the bounds may change -0124 %dramatically. Let the most constraining reaction determine -0125 %the new bound -0126 lb1=reducedModel.lb(involvedRxns(1)); -0127 lb2=reducedModel.lb(involvedRxns(2)); -0128 ub1=reducedModel.ub(involvedRxns(1)); -0129 ub2=reducedModel.ub(involvedRxns(2)); -0130 -0131 if lb2~=-inf -0132 reducedModel.lb(involvedRxns(1))=max(lb1,lb2/stoichRatio); -0133 end -0134 if ub2~=inf -0135 reducedModel.ub(involvedRxns(1))=min(ub1,ub2/stoichRatio); +0122 %At this point the second reaction is certain to be deleted +0123 %in a later step and can therefore be ignored +0124 +0125 %Recalculate the bounds for the new reaction. This can be +0126 %problematic since the scale of the bounds may change +0127 %dramatically. Let the most constraining reaction determine +0128 %the new bound +0129 lb1=reducedModel.lb(involvedRxns(1)); +0130 lb2=reducedModel.lb(involvedRxns(2)); +0131 ub1=reducedModel.ub(involvedRxns(1)); +0132 ub2=reducedModel.ub(involvedRxns(2)); +0133 +0134 if lb2~=-inf +0135 reducedModel.lb(involvedRxns(1))=max(lb1,lb2/stoichRatio); 0136 end -0137 -0138 %take care of the .rev flag - it could be that the combined rxn changes from rev to irrev -0139 reducedModel.rev(involvedRxns(1)) = reducedModel.rev(involvedRxns(1))*reducedModel.rev(involvedRxns(2));%this is a way to do an "and" operation with 0 and 1 numbers -0140 -0141 %Then recalculate the objective coefficient. The resulting -0142 %coefficient is the weighted sum of the previous -0143 reducedModel.c(involvedRxns(1))=reducedModel.c(involvedRxns(1))+reducedModel.c(involvedRxns(2))*stoichRatio; -0144 -0145 %store which reactions that have been merged -0146 rxnInd1 = find(strcmp(origRxnIds, reducedModel.rxns(involvedRxns(1)))); -0147 rxnInd2 = find(strcmp(origRxnIds, reducedModel.rxns(involvedRxns(2)))); -0148 grpId = max(groupIds(rxnInd1),groupIds(rxnInd2)); -0149 if grpId == 0 -0150 grpId = nextGroupId; -0151 nextGroupId = nextGroupId + 1; -0152 end -0153 -0154 if groupIds(rxnInd1) ~= grpId -0155 if groupIds(rxnInd1) == 0 -0156 %not merged before, just set the group id -0157 groupIds(rxnInd1) = grpId; -0158 else -0159 %merged before - all rxns with the same group id should be changed -0160 groupIds(groupIds == groupIds(rxnInd1)) = grpId; -0161 end -0162 end -0163 if groupIds(rxnInd2) ~= grpId -0164 if groupIds(rxnInd2) == 0 -0165 %not merged before, just set the group id -0166 groupIds(rxnInd2) = grpId; -0167 else -0168 %merged before - all rxns with the same group id should be changed -0169 groupIds(groupIds == groupIds(rxnInd2)) = grpId; -0170 end -0171 end -0172 -0173 %Iterate again -0174 mergedSome=true; -0175 end -0176 end -0177 -0178 %All possible reactions merged -0179 if mergedSome==false -0180 break; -0181 end -0182 -0183 %Now delete all reactions that involve no metabolites -0184 I=find(sum(reducedModel.S~=0,1)==0); +0137 if ub2~=inf +0138 reducedModel.ub(involvedRxns(1))=min(ub1,ub2/stoichRatio); +0139 end +0140 +0141 %take care of the .rev flag - it could be that the combined rxn changes from rev to irrev +0142 reducedModel.rev(involvedRxns(1)) = reducedModel.rev(involvedRxns(1))*reducedModel.rev(involvedRxns(2));%this is a way to do an "and" operation with 0 and 1 numbers +0143 +0144 %Then recalculate the objective coefficient. The resulting +0145 %coefficient is the weighted sum of the previous +0146 reducedModel.c(involvedRxns(1))=reducedModel.c(involvedRxns(1))+reducedModel.c(involvedRxns(2))*stoichRatio; +0147 +0148 %store which reactions that have been merged +0149 rxnInd1 = find(strcmp(origRxnIds, reducedModel.rxns(involvedRxns(1)))); +0150 rxnInd2 = find(strcmp(origRxnIds, reducedModel.rxns(involvedRxns(2)))); +0151 grpId = max(groupIds(rxnInd1),groupIds(rxnInd2)); +0152 if grpId == 0 +0153 grpId = nextGroupId; +0154 nextGroupId = nextGroupId + 1; +0155 end +0156 +0157 if groupIds(rxnInd1) ~= grpId +0158 if groupIds(rxnInd1) == 0 +0159 %not merged before, just set the group id +0160 groupIds(rxnInd1) = grpId; +0161 else +0162 %merged before - all rxns with the same group id should be changed +0163 groupIds(groupIds == groupIds(rxnInd1)) = grpId; +0164 end +0165 end +0166 if groupIds(rxnInd2) ~= grpId +0167 if groupIds(rxnInd2) == 0 +0168 %not merged before, just set the group id +0169 groupIds(rxnInd2) = grpId; +0170 else +0171 %merged before - all rxns with the same group id should be changed +0172 groupIds(groupIds == groupIds(rxnInd2)) = grpId; +0173 end +0174 end +0175 +0176 %Iterate again +0177 mergedSome=true; +0178 end +0179 end +0180 +0181 %All possible reactions merged +0182 if mergedSome==false +0183 break; +0184 end 0185 -0186 %Remove reactions -0187 reducedModel=removeReactions(reducedModel,I); +0186 %Now delete all reactions that involve no metabolites +0187 I=find(sum(reducedModel.S~=0,1)==0); 0188 -0189 %Remove metabolites -0190 notInUse=sum(reducedModel.S~=0,2)==0; -0191 reducedModel=removeMets(reducedModel,notInUse); -0192 end -0193 -0194 function [model1,reversedRxns1] = flipRxn(model1, rxnInd, groupIds1, reversedRxns1) -0195 model1.S(:,rxnInd) = model1.S(:,rxnInd)*-1; -0196 %swap the bounds -0197 ub = model1.ub(rxnInd); -0198 model1.ub(rxnInd) = -model1.lb(rxnInd); -0199 model1.lb(rxnInd) = -ub; -0200 %flip the objective -0201 model1.c(rxnInd) = -model1.c(rxnInd); -0202 -0203 %now take care of the reversedRxns - if this is a group, reverse all of the -0204 %reactions in the group in the reversedRxns index - they will all be reversed at the -0205 %same time since they are the same rxn. -0206 rxnIndices = rxnInd; -0207 if groupIds1(rxnInd) > 0 -0208 rxnIndices = find(groupIds1 == groupIds1(rxnInd)); -0209 end -0210 reversedRxns1(rxnIndices) = ~reversedRxns1(rxnIndices); -0211 end -0212 end +0189 %Remove reactions +0190 reducedModel=removeReactions(reducedModel,I); +0191 +0192 %Remove metabolites +0193 notInUse=sum(reducedModel.S~=0,2)==0; +0194 reducedModel=removeMets(reducedModel,notInUse); +0195 end +0196 +0197 function [model1,reversedRxns1] = flipRxn(model1, rxnInd, groupIds1, reversedRxns1) +0198 model1.S(:,rxnInd) = model1.S(:,rxnInd)*-1; +0199 %swap the bounds +0200 ub = model1.ub(rxnInd); +0201 model1.ub(rxnInd) = -model1.lb(rxnInd); +0202 model1.lb(rxnInd) = -ub; +0203 %flip the objective +0204 model1.c(rxnInd) = -model1.c(rxnInd); +0205 +0206 %now take care of the reversedRxns - if this is a group, reverse all of the +0207 %reactions in the group in the reversedRxns index - they will all be reversed at the +0208 %same time since they are the same rxn. +0209 rxnIndices = rxnInd; +0210 if groupIds1(rxnInd) > 0 +0211 rxnIndices = find(groupIds1 == groupIds1(rxnInd)); +0212 end +0213 reversedRxns1(rxnIndices) = ~reversedRxns1(rxnIndices); +0214 end +0215 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/INIT/removeLowScoreGenes.html b/doc/INIT/removeLowScoreGenes.html index 8ec08a62..9994b421 100644 --- a/doc/INIT/removeLowScoreGenes.html +++ b/doc/INIT/removeLowScoreGenes.html @@ -223,140 +223,143 @@

SOURCE CODE ^if isfield(newModel,'geneShortNames') 0120 newModel.geneShortNames(remInd) = []; 0121 end -0122 if isfield(newModel,'geneMiriams') -0123 newModel.geneMiriams(remInd) = []; +0122 if isfield(newModel,'proteinNames') +0123 newModel.proteinNames(remInd) = []; 0124 end -0125 if isfield(newModel,'geneFrom') -0126 newModel.geneFrom(remInd) = []; +0125 if isfield(newModel,'geneMiriams') +0126 newModel.geneMiriams(remInd) = []; 0127 end -0128 if isfield(newModel,'geneComps') -0129 newModel.geneComps(remInd) = []; +0128 if isfield(newModel,'geneFrom') +0129 newModel.geneFrom(remInd) = []; 0130 end -0131 -0132 +0131 if isfield(newModel,'geneComps') +0132 newModel.geneComps(remInd) = []; 0133 end 0134 0135 -0136 -0137 function [updatedRule,rScore] = processSimpleRule(rule,genes,gScores,isozymeScoring,complexScoring) -0138 % Either score or modify a reaction gene rule containig only ANDs or ORs. -0139 % -0140 % If the rule contains an enzyme complex (all ANDs), the complex will be -0141 % scored based on the score of its subunits. Subunits without a score (NaN) -0142 % will be excluded from the score calculation. -0143 % -0144 % If the rule contains only isozymes (all ORs), the negative-score genes -0145 % will be removed from the rule. Isozymes without a score (NaN) will not be -0146 % removed from the rule. The resuling rule will then be scored. -0147 -0148 -0149 % get IDs and indices of genes involved in rule -0150 ruleGenes = unique(regexp(rule,'[^&|\(\) ]+','match')); -0151 [~,geneInd] = ismember(ruleGenes,genes); -0152 -0153 % rules with one or no genes remain unchanged -0154 if numel(ruleGenes) < 2 -0155 rScore = gScores(geneInd); -0156 updatedRule = rule; -0157 return -0158 end -0159 -0160 if ~contains(rule,'&') % rule contains isozymes -0161 -0162 scoreMethod = isozymeScoring; -0163 negInd = gScores(geneInd) < 0; % NaNs will return false here -0164 if all(negInd) -0165 % get the least negative gene, adding a small random value to avoid a tie -0166 [~,maxInd] = max(gScores(geneInd) + rand(size(geneInd))*(1e-8)); -0167 updatedRule = ruleGenes{maxInd}; -0168 elseif sum(~negInd) == 1 -0169 updatedRule = ruleGenes{~negInd}; -0170 else -0171 updatedRule = strjoin(ruleGenes(~negInd),' | '); -0172 if startsWith(rule,'(') -0173 updatedRule = ['(',updatedRule,')']; -0174 end -0175 end -0176 -0177 % update ruleGenes and their indices -0178 ruleGenes = unique(regexp(updatedRule,'[^&|\(\) ]+','match')); -0179 [~,geneInd] = ismember(ruleGenes,genes); -0180 -0181 elseif ~contains(rule,'|') % rule contains enzyme complex -0182 scoreMethod = complexScoring; -0183 updatedRule = rule; -0184 else -0185 error('This function cannot handle rules with both "OR" and "AND" expressions.'); -0186 end -0187 -0188 % score rule -0189 switch lower(scoreMethod) -0190 case 'min' -0191 rScore = min(gScores(geneInd),[],'omitnan'); -0192 case 'max' -0193 rScore = max(gScores(geneInd),[],'omitnan'); -0194 case 'median' -0195 rScore = median(gScores(geneInd),'omitnan'); -0196 case 'average' -0197 rScore = mean(gScores(geneInd),'omitnan'); -0198 end -0199 -0200 end -0201 +0136 end +0137 +0138 +0139 +0140 function [updatedRule,rScore] = processSimpleRule(rule,genes,gScores,isozymeScoring,complexScoring) +0141 % Either score or modify a reaction gene rule containig only ANDs or ORs. +0142 % +0143 % If the rule contains an enzyme complex (all ANDs), the complex will be +0144 % scored based on the score of its subunits. Subunits without a score (NaN) +0145 % will be excluded from the score calculation. +0146 % +0147 % If the rule contains only isozymes (all ORs), the negative-score genes +0148 % will be removed from the rule. Isozymes without a score (NaN) will not be +0149 % removed from the rule. The resuling rule will then be scored. +0150 +0151 +0152 % get IDs and indices of genes involved in rule +0153 ruleGenes = unique(regexp(rule,'[^&|\(\) ]+','match')); +0154 [~,geneInd] = ismember(ruleGenes,genes); +0155 +0156 % rules with one or no genes remain unchanged +0157 if numel(ruleGenes) < 2 +0158 rScore = gScores(geneInd); +0159 updatedRule = rule; +0160 return +0161 end +0162 +0163 if ~contains(rule,'&') % rule contains isozymes +0164 +0165 scoreMethod = isozymeScoring; +0166 negInd = gScores(geneInd) < 0; % NaNs will return false here +0167 if all(negInd) +0168 % get the least negative gene, adding a small random value to avoid a tie +0169 [~,maxInd] = max(gScores(geneInd) + rand(size(geneInd))*(1e-8)); +0170 updatedRule = ruleGenes{maxInd}; +0171 elseif sum(~negInd) == 1 +0172 updatedRule = ruleGenes{~negInd}; +0173 else +0174 updatedRule = strjoin(ruleGenes(~negInd),' | '); +0175 if startsWith(rule,'(') +0176 updatedRule = ['(',updatedRule,')']; +0177 end +0178 end +0179 +0180 % update ruleGenes and their indices +0181 ruleGenes = unique(regexp(updatedRule,'[^&|\(\) ]+','match')); +0182 [~,geneInd] = ismember(ruleGenes,genes); +0183 +0184 elseif ~contains(rule,'|') % rule contains enzyme complex +0185 scoreMethod = complexScoring; +0186 updatedRule = rule; +0187 else +0188 error('This function cannot handle rules with both "OR" and "AND" expressions.'); +0189 end +0190 +0191 % score rule +0192 switch lower(scoreMethod) +0193 case 'min' +0194 rScore = min(gScores(geneInd),[],'omitnan'); +0195 case 'max' +0196 rScore = max(gScores(geneInd),[],'omitnan'); +0197 case 'median' +0198 rScore = median(gScores(geneInd),'omitnan'); +0199 case 'average' +0200 rScore = mean(gScores(geneInd),'omitnan'); +0201 end 0202 -0203 -0204 function updatedRule = processComplexRule(rule,genes,gScores,isozymeScoring,complexScoring) -0205 % Update reactions containing both AND and OR expressions. -0206 % -0207 % Negative-score genes will be removed if they are isozymic, whereas they -0208 % will not be removed if they are part of an enzyme complex. However, if -0209 % the enzyme complex has a negative score, the entire complex will be -0210 % removed, as long as it is not the only remaining element in the rule. -0211 -0212 -0213 % Specify phrases to search for in the grRule. These phrases will find -0214 % genes grouped by all ANDs (first phrase) or all ORs (second phrase). -0215 search_phrases = {'\([^&|\(\) ]+( & [^&|\(\) ]+)+\)', '\([^&|\(\) ]+( \| [^&|\(\) ]+)+\)'}; -0216 -0217 % initialize some variables -0218 subsets = {}; % subsets are groups of genes grouped by all ANDs or all ORs -0219 c = 1; % counter to keep track of the group (subset) number -0220 r_orig = rule; % record original rule to determine when it stops changing -0221 for k = 1:100 % iterate some arbitrarily high number of times -0222 for j = 1:length(search_phrases) -0223 new_subset = regexp(rule,search_phrases{j},'match')'; % extract subsets -0224 if ~isempty(new_subset) -0225 subsets = [subsets; new_subset]; % append to list of subsets -0226 subset_nums = arrayfun(@num2str,(c:length(subsets))','UniformOutput',false); % get group numbers to be assigned to the new subsets, and convert to strings -0227 rule = regexprep(rule,search_phrases{j},strcat('#',subset_nums,'#'),'once'); % replace the subsets in the expression with their group numbers (enclosed by "#"s) -0228 c = c + length(new_subset); -0229 end -0230 end -0231 if isequal(rule,r_orig) -0232 break; % stop iterating when rule stops changing -0233 else -0234 r_orig = rule; -0235 end -0236 end -0237 subsets{end+1} = rule; % add final state of rule as the last subset -0238 -0239 % score and update each subset, and append to gene list and gene scores -0240 for i = 1:numel(subsets) -0241 [subsets{i},subset_score] = processSimpleRule(subsets{i},genes,gScores,isozymeScoring,complexScoring); -0242 gScores = [gScores; subset_score]; -0243 genes = [genes; {strcat('#',num2str(i),'#')}]; -0244 end -0245 -0246 % reconstruct the rule from its updated subsets -0247 updatedRule = subsets{end}; -0248 for i = c-1:-1:1 -0249 updatedRule = regexprep(updatedRule,strcat('#',num2str(i),'#'),subsets{i}); -0250 end -0251 -0252 end -0253 +0203 end +0204 +0205 +0206 +0207 function updatedRule = processComplexRule(rule,genes,gScores,isozymeScoring,complexScoring) +0208 % Update reactions containing both AND and OR expressions. +0209 % +0210 % Negative-score genes will be removed if they are isozymic, whereas they +0211 % will not be removed if they are part of an enzyme complex. However, if +0212 % the enzyme complex has a negative score, the entire complex will be +0213 % removed, as long as it is not the only remaining element in the rule. +0214 +0215 +0216 % Specify phrases to search for in the grRule. These phrases will find +0217 % genes grouped by all ANDs (first phrase) or all ORs (second phrase). +0218 search_phrases = {'\([^&|\(\) ]+( & [^&|\(\) ]+)+\)', '\([^&|\(\) ]+( \| [^&|\(\) ]+)+\)'}; +0219 +0220 % initialize some variables +0221 subsets = {}; % subsets are groups of genes grouped by all ANDs or all ORs +0222 c = 1; % counter to keep track of the group (subset) number +0223 r_orig = rule; % record original rule to determine when it stops changing +0224 for k = 1:100 % iterate some arbitrarily high number of times +0225 for j = 1:length(search_phrases) +0226 new_subset = regexp(rule,search_phrases{j},'match')'; % extract subsets +0227 if ~isempty(new_subset) +0228 subsets = [subsets; new_subset]; % append to list of subsets +0229 subset_nums = arrayfun(@num2str,(c:length(subsets))','UniformOutput',false); % get group numbers to be assigned to the new subsets, and convert to strings +0230 rule = regexprep(rule,search_phrases{j},strcat('#',subset_nums,'#'),'once'); % replace the subsets in the expression with their group numbers (enclosed by "#"s) +0231 c = c + length(new_subset); +0232 end +0233 end +0234 if isequal(rule,r_orig) +0235 break; % stop iterating when rule stops changing +0236 else +0237 r_orig = rule; +0238 end +0239 end +0240 subsets{end+1} = rule; % add final state of rule as the last subset +0241 +0242 % score and update each subset, and append to gene list and gene scores +0243 for i = 1:numel(subsets) +0244 [subsets{i},subset_score] = processSimpleRule(subsets{i},genes,gScores,isozymeScoring,complexScoring); +0245 gScores = [gScores; subset_score]; +0246 genes = [genes; {strcat('#',num2str(i),'#')}]; +0247 end +0248 +0249 % reconstruct the rule from its updated subsets +0250 updatedRule = subsets{end}; +0251 for i = c-1:-1:1 +0252 updatedRule = regexprep(updatedRule,strcat('#',num2str(i),'#'),subsets{i}); +0253 end 0254 -0255 +0255 end +0256 +0257 +0258
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/addGenesRaven.html b/doc/core/addGenesRaven.html index 38134ede..99bba050 100644 --- a/doc/core/addGenesRaven.html +++ b/doc/core/addGenesRaven.html @@ -42,6 +42,8 @@

DESCRIPTION ^SOURCE CODE ^% default '') 0015 % geneMiriams cell array with MIRIAM structures (optional, 0016 % default []) -0017 % -0018 % newModel an updated model structure +0017 % proteinNames cell array of protein names associated to +0018 % each gene (optional, default '') 0019 % -0020 % NOTE: This function does not make extensive checks about MIRIAM formats, -0021 % forbidden characters or such. -0022 % -0023 % Usage: newModel=addGenesRaven(model,genesToAdd) -0024 -0025 newModel=model; +0020 % newModel an updated model structure +0021 % +0022 % NOTE: This function does not make extensive checks about MIRIAM formats, +0023 % forbidden characters or such. +0024 % +0025 % Usage: newModel=addGenesRaven(model,genesToAdd) 0026 -0027 if isempty(genesToAdd) -0028 return; -0029 end -0030 -0031 %Check some stuff regarding the required fields -0032 if ~isfield(genesToAdd,'genes') -0033 EM='genes is a required field in genesToAdd'; -0034 dispEM(EM); -0035 else -0036 genesToAdd.genes=convertCharArray(genesToAdd.genes); -0037 end -0038 -0039 %Number of genes -0040 nGenes=numel(genesToAdd.genes); -0041 nOldGenes=numel(model.genes); -0042 filler=cell(nGenes,1); -0043 filler(:)={''}; -0044 largeFiller=cell(nOldGenes,1); -0045 largeFiller(:)={''}; -0046 -0047 %Check that no gene ids are already present in the model -0048 I=ismember(genesToAdd.genes,model.genes); -0049 if all(I) -0050 warning('All genes in genesToAdd.genes are already present in model.genes'); -0051 return -0052 elseif any(I) -0053 existingGenes=strjoin(genesToAdd.genes(I), ', '); -0054 warning(['The following genes are already present in model.genes and will therefore not be added: ', existingGenes]) -0055 genesToAdd.genes(I)=[]; -0056 if isfield(genesToAdd,'geneShortNames') -0057 genesToAdd.geneShortNames(I)=[]; -0058 end -0059 if isfield(genesToAdd,'geneMiriams') -0060 genesToAdd.geneMiriams(I)=[]; -0061 end -0062 else -0063 newModel.genes=[newModel.genes;genesToAdd.genes(:)]; -0064 end -0065 -0066 %Some more checks and if they pass then add each field to the structure -0067 if isfield(genesToAdd,'geneShortNames') -0068 genesToAdd.geneShortNames=convertCharArray(genesToAdd.geneShortNames); -0069 if numel(genesToAdd.geneShortNames)~=nGenes -0070 EM='genesToAdd.geneShortNames must have the same number of elements as genesToAdd.genes'; -0071 dispEM(EM); -0072 end -0073 %Add empty field if it doesn't exist -0074 if ~isfield(newModel,'geneShortNames') -0075 newModel.geneShortNames=largeFiller; -0076 end -0077 newModel.geneShortNames=[newModel.geneShortNames;genesToAdd.geneShortNames(:)]; -0078 else -0079 %Add empty strings if structure is in model -0080 if isfield(newModel,'geneShortNames') -0081 newModel.geneShortNames=[newModel.geneShortNames;filler]; -0082 end -0083 end -0084 -0085 %Don't check the type of geneMiriams -0086 if isfield(genesToAdd,'geneMiriams') -0087 if numel(genesToAdd.geneMiriams)~=nGenes -0088 EM='genesToAdd.geneMiriams must have the same number of elements as genesToAdd.genes'; -0089 dispEM(EM); -0090 end -0091 %Add empty field if it doesn't exist -0092 if ~isfield(newModel,'geneMiriams') -0093 newModel.geneMiriams=cell(nOldGenes,1); +0027 newModel=model; +0028 +0029 if isempty(genesToAdd) +0030 return; +0031 end +0032 +0033 %Check some stuff regarding the required fields +0034 if ~isfield(genesToAdd,'genes') +0035 EM='genes is a required field in genesToAdd'; +0036 dispEM(EM); +0037 else +0038 genesToAdd.genes=convertCharArray(genesToAdd.genes); +0039 end +0040 +0041 %Number of genes +0042 nGenes=numel(genesToAdd.genes); +0043 nOldGenes=numel(model.genes); +0044 filler=cell(nGenes,1); +0045 filler(:)={''}; +0046 largeFiller=cell(nOldGenes,1); +0047 largeFiller(:)={''}; +0048 +0049 %Check that no gene ids are already present in the model +0050 I=ismember(genesToAdd.genes,model.genes); +0051 if all(I) +0052 warning('All genes in genesToAdd.genes are already present in model.genes'); +0053 return +0054 elseif any(I) +0055 existingGenes=strjoin(genesToAdd.genes(I), ', '); +0056 warning(['The following genes are already present in model.genes and will therefore not be added: ', existingGenes]) +0057 genesToAdd.genes(I)=[]; +0058 if isfield(genesToAdd,'geneShortNames') +0059 genesToAdd.geneShortNames(I)=[]; +0060 end +0061 if isfield(genesToAdd,'proteinNames') +0062 genesToAdd.proteinNames(I)=[]; +0063 end +0064 if isfield(genesToAdd,'geneMiriams') +0065 genesToAdd.geneMiriams(I)=[]; +0066 end +0067 else +0068 newModel.genes=[newModel.genes;genesToAdd.genes(:)]; +0069 end +0070 +0071 %Some more checks and if they pass then add each field to the structure +0072 if isfield(genesToAdd,'geneShortNames') +0073 genesToAdd.geneShortNames=convertCharArray(genesToAdd.geneShortNames); +0074 if numel(genesToAdd.geneShortNames)~=nGenes +0075 EM='genesToAdd.geneShortNames must have the same number of elements as genesToAdd.genes'; +0076 dispEM(EM); +0077 end +0078 %Add empty field if it doesn't exist +0079 if ~isfield(newModel,'geneShortNames') +0080 newModel.geneShortNames=largeFiller; +0081 end +0082 newModel.geneShortNames=[newModel.geneShortNames;genesToAdd.geneShortNames(:)]; +0083 else +0084 %Add empty strings if structure is in model +0085 if isfield(newModel,'geneShortNames') +0086 newModel.geneShortNames=[newModel.geneShortNames;filler]; +0087 end +0088 end +0089 if isfield(genesToAdd,'proteinNames') +0090 genesToAdd.proteinNames=convertCharArray(genesToAdd.proteinNames); +0091 if numel(genesToAdd.proteinNames)~=nGenes +0092 EM='genesToAdd.proteinNames must have the same number of elements as genesToAdd.genes'; +0093 dispEM(EM); 0094 end -0095 newModel.geneMiriams=[newModel.geneMiriams;genesToAdd.geneMiriams(:)]; -0096 else -0097 if isfield(newModel,'geneMiriams') -0098 newModel.geneMiriams=[newModel.geneMiriams;cell(nGenes,1)]; -0099 end -0100 end -0101 -0102 if isfield(genesToAdd,'geneComps') -0103 if numel(genesToAdd.geneComps)~=nGenes -0104 EM='genesToAdd.geneComps must have the same number of elements as genesToAdd.genes'; -0105 dispEM(EM); -0106 end -0107 %Add empty field if it doesn't exist -0108 if ~isfield(newModel,'geneComps') -0109 newModel.geneComps=ones(nOldGenes,1); -0110 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; -0111 dispEM(EM,false); -0112 end -0113 newModel.geneComps=[newModel.geneComps;genesToAdd.geneComps(:)]; -0114 else -0115 if isfield(newModel,'geneComps') -0116 newModel.geneComps=[newModel.geneComps;ones(nGenes,1)]; -0117 fprintf('NOTE: The added genes will be assigned to the first compartment\n'); -0118 end -0119 end -0120 -0121 if isfield(newModel,'geneFrom') -0122 newModel.geneFrom=[newModel.geneFrom;filler]; +0095 %Add empty field if it doesn't exist +0096 if ~isfield(newModel,'proteinNames') +0097 newModel.proteinNames=largeFiller; +0098 end +0099 newModel.proteinNames=[newModel.proteinNames;genesToAdd.proteinNames(:)]; +0100 else +0101 %Add empty strings if structure is in model +0102 if isfield(newModel,'proteinNames') +0103 newModel.proteinNames=[newModel.proteinNames;filler]; +0104 end +0105 end +0106 +0107 +0108 %Don't check the type of geneMiriams +0109 if isfield(genesToAdd,'geneMiriams') +0110 if numel(genesToAdd.geneMiriams)~=nGenes +0111 EM='genesToAdd.geneMiriams must have the same number of elements as genesToAdd.genes'; +0112 dispEM(EM); +0113 end +0114 %Add empty field if it doesn't exist +0115 if ~isfield(newModel,'geneMiriams') +0116 newModel.geneMiriams=cell(nOldGenes,1); +0117 end +0118 newModel.geneMiriams=[newModel.geneMiriams;genesToAdd.geneMiriams(:)]; +0119 else +0120 if isfield(newModel,'geneMiriams') +0121 newModel.geneMiriams=[newModel.geneMiriams;cell(nGenes,1)]; +0122 end 0123 end 0124 -0125 if isfield(newModel,'rxnGeneMat') -0126 newModel.rxnGeneMat=[newModel.rxnGeneMat,zeros(length(newModel.rxns),nGenes)]; -0127 end -0128 end +0125 if isfield(genesToAdd,'geneComps') +0126 if numel(genesToAdd.geneComps)~=nGenes +0127 EM='genesToAdd.geneComps must have the same number of elements as genesToAdd.genes'; +0128 dispEM(EM); +0129 end +0130 %Add empty field if it doesn't exist +0131 if ~isfield(newModel,'geneComps') +0132 newModel.geneComps=ones(nOldGenes,1); +0133 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; +0134 dispEM(EM,false); +0135 end +0136 newModel.geneComps=[newModel.geneComps;genesToAdd.geneComps(:)]; +0137 else +0138 if isfield(newModel,'geneComps') +0139 newModel.geneComps=[newModel.geneComps;ones(nGenes,1)]; +0140 fprintf('NOTE: The added genes will be assigned to the first compartment\n'); +0141 end +0142 end +0143 +0144 if isfield(newModel,'geneFrom') +0145 newModel.geneFrom=[newModel.geneFrom;filler]; +0146 end +0147 +0148 if isfield(newModel,'rxnGeneMat') +0149 newModel.rxnGeneMat=[newModel.rxnGeneMat,zeros(length(newModel.rxns),nGenes)]; +0150 end +0151 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/deleteUnusedGenes.html b/doc/core/deleteUnusedGenes.html index ff226d11..bd8131d5 100644 --- a/doc/core/deleteUnusedGenes.html +++ b/doc/core/deleteUnusedGenes.html @@ -90,18 +90,22 @@

SOURCE CODE ^end 0039 -0040 if isfield(reducedModel,'geneMiriams') -0041 reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); +0040 if isfield(reducedModel,'proteinNames') +0041 reducedModel.proteinNames=reducedModel.proteinNames(toKeep); 0042 end 0043 -0044 if isfield(reducedModel,'geneFrom') -0045 reducedModel.geneFrom=reducedModel.geneFrom(toKeep); +0044 if isfield(reducedModel,'geneMiriams') +0045 reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); 0046 end 0047 -0048 if isfield(reducedModel,'geneComps') -0049 reducedModel.geneComps=reducedModel.geneComps(toKeep); +0048 if isfield(reducedModel,'geneFrom') +0049 reducedModel.geneFrom=reducedModel.geneFrom(toKeep); 0050 end -0051 end +0051 +0052 if isfield(reducedModel,'geneComps') +0053 reducedModel.geneComps=reducedModel.geneComps(toKeep); +0054 end +0055 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/getModelFromHomology.html b/doc/core/getModelFromHomology.html index 6e0e7f06..650f0647 100644 --- a/doc/core/getModelFromHomology.html +++ b/doc/core/getModelFromHomology.html @@ -214,447 +214,450 @@

SOURCE CODE ^for i=1:numel(models) 0109 modelNames{i}=models{i}.id; -0110 %Gene short names and geneMiriams are often different between species, -0111 %safer not to include them +0110 %Gene short names, geneMiriams and proteins are often different +0111 %between species, safer not to include them 0112 if isfield(models{i},'geneShortNames') 0113 models{i}=rmfield(models{i},'geneShortNames'); 0114 end 0115 if isfield(models{i},'geneMiriams') 0116 models{i}=rmfield(models{i},'geneMiriams'); 0117 end -0118 %The geneFrom field also loses meaning if the genes are replaced by -0119 %orthologs -0120 if isfield(models{i},'geneFrom') -0121 models{i}=rmfield(models{i},'geneFrom'); -0122 end -0123 end -0124 -0125 %Check that genes do not begin with ( or end with ), as this makes problematic grRules -0126 for i=1:numel(blastStructure) -0127 problemGenes = startsWith(blastStructure(i).fromGenes,'(') | endsWith(blastStructure(i).fromGenes,')'); -0128 if any(problemGenes) -0129 error(['One or multiple gene identifiers from ' blastStructure(i).fromId ... -0130 ' starts with ''('' and/or ends with '')'', which is not allowed']) -0131 end -0132 end -0133 -0134 %Assume for now that all information is there and that it's correct. This -0135 %is important to fix since no further checks are being made! +0118 if isfield(models{i},'proteinNames') +0119 models{i}=rmfield(models{i},'proteinNames'); +0120 end +0121 %The geneFrom field also loses meaning if the genes are replaced by +0122 %orthologs +0123 if isfield(models{i},'geneFrom') +0124 models{i}=rmfield(models{i},'geneFrom'); +0125 end +0126 end +0127 +0128 %Check that genes do not begin with ( or end with ), as this makes problematic grRules +0129 for i=1:numel(blastStructure) +0130 problemGenes = startsWith(blastStructure(i).fromGenes,'(') | endsWith(blastStructure(i).fromGenes,')'); +0131 if any(problemGenes) +0132 error(['One or multiple gene identifiers from ' blastStructure(i).fromId ... +0133 ' starts with ''('' and/or ends with '')'', which is not allowed']) +0134 end +0135 end 0136 -0137 %Check whether provided fasta files use the same gene identifiers as -0138 %provided template models -0139 for i=1:numel(blastStructure) -0140 if ~strcmp(blastStructure(i).fromId,getModelFor) -0141 j=strcmpi(blastStructure(i).fromId,modelNames); -0142 if j==0 -0143 error(['While the blastStructure contains sequences from '... -0144 'organismID "%s" (as\nprovided in getBlast), none of '... -0145 'template models have this id (as model.id)'],... -0146 string(blastStructure(i).fromId)); -0147 end -0148 k=sum(ismember(blastStructure(i).fromGenes,models{j}.genes)); -0149 if k<(numel(models{j}.genes)*0.05) -0150 error(['Less than 5%% of the genes in the template model '... -0151 'with model.id "%s"\ncan be found in the blastStructure. '... -0152 'Ensure that the protein FASTA\nused in getBlast and '... -0153 'the template model used in getModelFromHomology\nuse '... -0154 'the same style of gene identifiers'],models{j}.id) -0155 end -0156 end -0157 end -0158 -0159 %Standardize grRules of template models -0160 for i=1:length(models) -0161 fprintf('\nStandardizing grRules of template model with ID "%s" ...',models{i}.id); -0162 [models{i}.grRules,models{i}.rxnGeneMat]=standardizeGrRules(models{i},false); -0163 end -0164 fprintf(' done\n'); -0165 -0166 %Remove all gene matches that are below the cutoffs -0167 for i=1:numel(blastStructure) -0168 indexes=blastStructure(i).evalue<maxE & blastStructure(i).aligLen>=minLen & blastStructure(i).identity>=minIde; %Do it in this direction to lose NaNs -0169 blastStructure(i).fromGenes(~indexes)=[]; -0170 blastStructure(i).toGenes(~indexes)=[]; -0171 blastStructure(i).evalue(~indexes)=[]; -0172 blastStructure(i).identity(~indexes)=[]; -0173 blastStructure(i).aligLen(~indexes)=[]; -0174 blastStructure(i).bitscore(~indexes)=[]; -0175 blastStructure(i).ppos(~indexes)=[]; -0176 end -0177 -0178 %Remove all reactions from the models that have no genes encoding for them. -0179 %Also remove all genes that encode for no reactions. There shouldn't be any -0180 %but there might be mistakes -0181 for i=1:numel(models) -0182 [hasGenes, ~]=find(models{i}.rxnGeneMat); -0183 hasNoGenes=1:numel(models{i}.rxns); -0184 hasNoGenes(hasGenes)=[]; -0185 models{i}=removeReactions(models{i},hasNoGenes,true,true); -0186 end -0187 -0188 %Create a structure that contains all genes used in the blasts in any -0189 %direction for each of the models in 'models' and for the new organism. The -0190 %first cell is for the new organism and then according to the preferred -0191 %order. If no such order is supplied, then according to the order in -0192 %'models' -0193 allGenes=cell(numel(models)+1,1); -0194 if isempty(preferredOrder) -0195 useOrder=modelNames; -0196 else -0197 useOrder=preferredOrder; -0198 end -0199 -0200 %Get the corresponding indexes for those models in the 'models' structure -0201 useOrderIndexes=zeros(numel(models),1); -0202 for i=1:numel(models) -0203 [~, index]=ismember(models{i}.id,useOrder); -0204 useOrderIndexes(index)=i; -0205 end -0206 -0207 %Remove all genes from the blast structure that have no genes in the models -0208 if onlyGenesInModels==true -0209 modelGenes={}; -0210 for i=1:numel(models) -0211 modelGenes=[modelGenes;models{i}.genes]; -0212 end -0213 for i=1:numel(blastStructure) -0214 %Check to see if it should match the toId or fromId -0215 if strcmpi(blastStructure(i).fromId,getModelFor) -0216 I=ismember(blastStructure(i).toGenes,modelGenes); -0217 else -0218 I=ismember(blastStructure(i).fromGenes,modelGenes); -0219 end -0220 blastStructure(i).fromGenes(~I)=[]; -0221 blastStructure(i).toGenes(~I)=[]; -0222 blastStructure(i).evalue(~I)=[]; -0223 blastStructure(i).identity(~I)=[]; -0224 blastStructure(i).aligLen(~I)=[]; -0225 blastStructure(i).bitscore(~I)=[]; -0226 blastStructure(i).ppos(~I)=[]; -0227 -0228 %Check that no matching in blastStructure is empty. This happens if -0229 %no genes in the models are present in the corresponding sheet -0230 if isempty(blastStructure(i).fromGenes) -0231 EM=['No genes in matching from ' blastStructure(i).fromId ' to ' blastStructure(i).toId ' are present in the corresponding model']; -0232 dispEM(EM); -0233 end -0234 end -0235 end -0236 -0237 %If only best orthologs are to be used then all other measurements are -0238 %deleted from the blastStructure. All code after this stays the same. This -0239 %means that preferred order can still matter. The best ortholog scoring is -0240 %based only on the E-value -0241 if strictness==3 -0242 for i=1:numel(blastStructure) -0243 keep=false(numel(blastStructure(i).toGenes),1); -0244 [allFromGenes, ~, I]=unique(blastStructure(i).fromGenes); -0245 -0246 %It would be nice to get rid of this loop -0247 for j=1:numel(allFromGenes) -0248 allMatches=find(I==j); -0249 bestMatches=allMatches(blastStructure(i).evalue(allMatches)==min(blastStructure(i).evalue(allMatches))); -0250 -0251 %Keep the best matches -0252 keep(bestMatches)=true; -0253 end -0254 -0255 %Delete all matches that were not best matches -0256 blastStructure(i).fromGenes(~keep)=[]; -0257 blastStructure(i).toGenes(~keep)=[]; -0258 blastStructure(i).evalue(~keep)=[]; -0259 blastStructure(i).identity(~keep)=[]; -0260 blastStructure(i).aligLen(~keep)=[]; -0261 blastStructure(i).bitscore(~keep)=[]; -0262 blastStructure(i).ppos(~keep)=[]; -0263 end -0264 end -0265 -0266 useOrder=[{getModelFor};useOrder]; -0267 -0268 for i=1:numel(blastStructure) -0269 [~, toIndex]=ismember(blastStructure(i).toId,useOrder); -0270 [~, fromIndex]=ismember(blastStructure(i).fromId,useOrder); -0271 -0272 %Add all genes to the corresponding list in allGenes -0273 allGenes{toIndex}=[allGenes{toIndex};blastStructure(i).toGenes]; -0274 allGenes{fromIndex}=[allGenes{fromIndex};blastStructure(i).fromGenes]; -0275 end -0276 -0277 %Keep only the unique gene names -0278 maxOtherGeneNr=0; %Determines the dimension of the connectivity matrixes -0279 for i=1:numel(allGenes) -0280 allGenes{i}=unique(allGenes{i}); -0281 if i>1 -0282 if numel(allGenes{i})>maxOtherGeneNr -0283 maxOtherGeneNr=numel(allGenes{i}); -0284 end -0285 end -0286 end -0287 -0288 %Generate a cell array of matrixes that describes how the genes in the new -0289 %organism map to the models. Each cell matches to the corresponding model -0290 %in useOrder (starting at 2 of course). First dimension is gene in new -0291 %organism, second which gene it is in the other organism. The second matrix -0292 %describes how they map back. -0293 -0294 %As it is now, a significant match is indicated by a 1. This could be -0295 %expanded to contain some kind of significance level. The first dimension -0296 %is still the genes in the new model. -0297 -0298 allTo=cell(numel(useOrder)-1,1); -0299 allFrom=cell(numel(useOrder)-1,1); +0137 %Assume for now that all information is there and that it's correct. This +0138 %is important to fix since no further checks are being made! +0139 +0140 %Check whether provided fasta files use the same gene identifiers as +0141 %provided template models +0142 for i=1:numel(blastStructure) +0143 if ~strcmp(blastStructure(i).fromId,getModelFor) +0144 j=strcmpi(blastStructure(i).fromId,modelNames); +0145 if j==0 +0146 error(['While the blastStructure contains sequences from '... +0147 'organismID "%s" (as\nprovided in getBlast), none of '... +0148 'template models have this id (as model.id)'],... +0149 string(blastStructure(i).fromId)); +0150 end +0151 k=sum(ismember(blastStructure(i).fromGenes,models{j}.genes)); +0152 if k<(numel(models{j}.genes)*0.05) +0153 error(['Less than 5%% of the genes in the template model '... +0154 'with model.id "%s"\ncan be found in the blastStructure. '... +0155 'Ensure that the protein FASTA\nused in getBlast and '... +0156 'the template model used in getModelFromHomology\nuse '... +0157 'the same style of gene identifiers'],models{j}.id) +0158 end +0159 end +0160 end +0161 +0162 %Standardize grRules of template models +0163 for i=1:length(models) +0164 fprintf('\nStandardizing grRules of template model with ID "%s" ...',models{i}.id); +0165 [models{i}.grRules,models{i}.rxnGeneMat]=standardizeGrRules(models{i},false); +0166 end +0167 fprintf(' done\n'); +0168 +0169 %Remove all gene matches that are below the cutoffs +0170 for i=1:numel(blastStructure) +0171 indexes=blastStructure(i).evalue<maxE & blastStructure(i).aligLen>=minLen & blastStructure(i).identity>=minIde; %Do it in this direction to lose NaNs +0172 blastStructure(i).fromGenes(~indexes)=[]; +0173 blastStructure(i).toGenes(~indexes)=[]; +0174 blastStructure(i).evalue(~indexes)=[]; +0175 blastStructure(i).identity(~indexes)=[]; +0176 blastStructure(i).aligLen(~indexes)=[]; +0177 blastStructure(i).bitscore(~indexes)=[]; +0178 blastStructure(i).ppos(~indexes)=[]; +0179 end +0180 +0181 %Remove all reactions from the models that have no genes encoding for them. +0182 %Also remove all genes that encode for no reactions. There shouldn't be any +0183 %but there might be mistakes +0184 for i=1:numel(models) +0185 [hasGenes, ~]=find(models{i}.rxnGeneMat); +0186 hasNoGenes=1:numel(models{i}.rxns); +0187 hasNoGenes(hasGenes)=[]; +0188 models{i}=removeReactions(models{i},hasNoGenes,true,true); +0189 end +0190 +0191 %Create a structure that contains all genes used in the blasts in any +0192 %direction for each of the models in 'models' and for the new organism. The +0193 %first cell is for the new organism and then according to the preferred +0194 %order. If no such order is supplied, then according to the order in +0195 %'models' +0196 allGenes=cell(numel(models)+1,1); +0197 if isempty(preferredOrder) +0198 useOrder=modelNames; +0199 else +0200 useOrder=preferredOrder; +0201 end +0202 +0203 %Get the corresponding indexes for those models in the 'models' structure +0204 useOrderIndexes=zeros(numel(models),1); +0205 for i=1:numel(models) +0206 [~, index]=ismember(models{i}.id,useOrder); +0207 useOrderIndexes(index)=i; +0208 end +0209 +0210 %Remove all genes from the blast structure that have no genes in the models +0211 if onlyGenesInModels==true +0212 modelGenes={}; +0213 for i=1:numel(models) +0214 modelGenes=[modelGenes;models{i}.genes]; +0215 end +0216 for i=1:numel(blastStructure) +0217 %Check to see if it should match the toId or fromId +0218 if strcmpi(blastStructure(i).fromId,getModelFor) +0219 I=ismember(blastStructure(i).toGenes,modelGenes); +0220 else +0221 I=ismember(blastStructure(i).fromGenes,modelGenes); +0222 end +0223 blastStructure(i).fromGenes(~I)=[]; +0224 blastStructure(i).toGenes(~I)=[]; +0225 blastStructure(i).evalue(~I)=[]; +0226 blastStructure(i).identity(~I)=[]; +0227 blastStructure(i).aligLen(~I)=[]; +0228 blastStructure(i).bitscore(~I)=[]; +0229 blastStructure(i).ppos(~I)=[]; +0230 +0231 %Check that no matching in blastStructure is empty. This happens if +0232 %no genes in the models are present in the corresponding sheet +0233 if isempty(blastStructure(i).fromGenes) +0234 EM=['No genes in matching from ' blastStructure(i).fromId ' to ' blastStructure(i).toId ' are present in the corresponding model']; +0235 dispEM(EM); +0236 end +0237 end +0238 end +0239 +0240 %If only best orthologs are to be used then all other measurements are +0241 %deleted from the blastStructure. All code after this stays the same. This +0242 %means that preferred order can still matter. The best ortholog scoring is +0243 %based only on the E-value +0244 if strictness==3 +0245 for i=1:numel(blastStructure) +0246 keep=false(numel(blastStructure(i).toGenes),1); +0247 [allFromGenes, ~, I]=unique(blastStructure(i).fromGenes); +0248 +0249 %It would be nice to get rid of this loop +0250 for j=1:numel(allFromGenes) +0251 allMatches=find(I==j); +0252 bestMatches=allMatches(blastStructure(i).evalue(allMatches)==min(blastStructure(i).evalue(allMatches))); +0253 +0254 %Keep the best matches +0255 keep(bestMatches)=true; +0256 end +0257 +0258 %Delete all matches that were not best matches +0259 blastStructure(i).fromGenes(~keep)=[]; +0260 blastStructure(i).toGenes(~keep)=[]; +0261 blastStructure(i).evalue(~keep)=[]; +0262 blastStructure(i).identity(~keep)=[]; +0263 blastStructure(i).aligLen(~keep)=[]; +0264 blastStructure(i).bitscore(~keep)=[]; +0265 blastStructure(i).ppos(~keep)=[]; +0266 end +0267 end +0268 +0269 useOrder=[{getModelFor};useOrder]; +0270 +0271 for i=1:numel(blastStructure) +0272 [~, toIndex]=ismember(blastStructure(i).toId,useOrder); +0273 [~, fromIndex]=ismember(blastStructure(i).fromId,useOrder); +0274 +0275 %Add all genes to the corresponding list in allGenes +0276 allGenes{toIndex}=[allGenes{toIndex};blastStructure(i).toGenes]; +0277 allGenes{fromIndex}=[allGenes{fromIndex};blastStructure(i).fromGenes]; +0278 end +0279 +0280 %Keep only the unique gene names +0281 maxOtherGeneNr=0; %Determines the dimension of the connectivity matrixes +0282 for i=1:numel(allGenes) +0283 allGenes{i}=unique(allGenes{i}); +0284 if i>1 +0285 if numel(allGenes{i})>maxOtherGeneNr +0286 maxOtherGeneNr=numel(allGenes{i}); +0287 end +0288 end +0289 end +0290 +0291 %Generate a cell array of matrixes that describes how the genes in the new +0292 %organism map to the models. Each cell matches to the corresponding model +0293 %in useOrder (starting at 2 of course). First dimension is gene in new +0294 %organism, second which gene it is in the other organism. The second matrix +0295 %describes how they map back. +0296 +0297 %As it is now, a significant match is indicated by a 1. This could be +0298 %expanded to contain some kind of significance level. The first dimension +0299 %is still the genes in the new model. 0300 -0301 for i=1:numel(useOrder)-1 -0302 allTo{i}=sparse(numel(allGenes{1}),numel(allGenes{i+1})); -0303 allFrom{i}=sparse(numel(allGenes{1}),numel(allGenes{i+1})); -0304 end -0305 -0306 %Fill the matches to other species -0307 for i=1:numel(blastStructure) -0308 if strcmp(blastStructure(i).toId,getModelFor) -0309 %This was 'to' the new organism. They should all match so no checks -0310 %are being made -0311 [~, a]=ismember(blastStructure(i).toGenes,allGenes{1}); -0312 [~, fromModel]=ismember(blastStructure(i).fromId,useOrder); -0313 [~, b]=ismember(blastStructure(i).fromGenes,allGenes{fromModel}); -0314 idx = sub2ind(size(allTo{fromModel-1}), a, b); -0315 allTo{fromModel-1}(idx)=1; -0316 else -0317 %This was 'from' the new organism -0318 [~, a]=ismember(blastStructure(i).fromGenes,allGenes{1}); -0319 [~, toModel]=ismember(blastStructure(i).toId,useOrder); -0320 [~, b]=ismember(blastStructure(i).toGenes,allGenes{toModel}); -0321 idx = sub2ind(size(allFrom{toModel-1}), a, b); -0322 allFrom{toModel-1}(idx)=1; -0323 end -0324 end -0325 -0326 %Now we have all the gene matches in a convenient way. For all the genes in -0327 %the new organism get the genes that should be included from other -0328 %organisms. If all genes should be included this simply means keep the -0329 %allFrom matrix as it is. If only orthologs which could be mapped in both -0330 %BLAST directions are to be included then only those elements are kept. -0331 -0332 finalMappings=cell(numel(useOrder)-1,1); -0333 if strictness==1 || strictness==3 -0334 for j=1:numel(allFrom) -0335 finalMappings{j}=allTo{j}~=0 & allFrom{j}~=0; -0336 end -0337 else -0338 if mapNewGenesToOld==true -0339 finalMappings=allFrom; -0340 else -0341 finalMappings=allTo; -0342 end -0343 end -0344 -0345 %Remove all genes from the mapping that are not in the models. This doesn't -0346 %do much if only genes in the models were used for the original mapping. -0347 %Also simplify the finalMapping and allGenes structures so that they only -0348 %contain mappings that exist -0349 usedNewGenes=false(numel(allGenes{1}),1); -0350 -0351 for i=1:numel(allGenes)-1 -0352 %First remove mappings for those genes that are not in the model -0353 if onlyGenesInModels==false -0354 a=ismember(allGenes{i+1},models{useOrderIndexes(i)}.genes); -0355 finalMappings{i}(:,~a)=false; -0356 end -0357 -0358 %Then remove unused ones and simplify -0359 [a, b]=find(finalMappings{i}); -0360 usedGenes=false(numel(allGenes{i+1}),1); -0361 usedNewGenes(a)=true; -0362 usedGenes(b)=true; -0363 finalMappings{i}=finalMappings{i}(:,usedGenes); -0364 allGenes{i+1}=allGenes{i+1}(usedGenes); -0365 end -0366 -0367 %Remove all new genes that have not been mapped to anything -0368 allGenes{1}=allGenes{1}(usedNewGenes); -0369 for i=1:numel(finalMappings) -0370 finalMappings{i}=finalMappings{i}(usedNewGenes,:); -0371 end -0372 -0373 %Now is it time to choose which reactions should be included from which -0374 %models. If there is a preferred order specified then each gene can only -0375 %result in reactions from one model, otherwise they should all be included -0376 -0377 %Start by simplifying the models by removing genes/reactions that are not -0378 %used. This is where it gets weird with complexes, especially "or" -0379 %complexes. In this step only reactions which are encoded by one single -0380 %gene, or where all genes should be deleted, are deleted. The info on the -0381 %full complex is still present in the grRules -0382 -0383 for i=1:numel(models) -0384 a=ismember(models{useOrderIndexes(i)}.genes,allGenes{i+1}); -0385 -0386 %Remove reactions that are not associated to any of the genes in -0387 %allGenes, thereby also keeping complexes where only for one of the -0388 %genes was matched -0389 [rxnsToKeep,~] = find(models{useOrderIndexes(i)}.rxnGeneMat(:,a)); -0390 rxnsToRemove = repmat(1,numel(models{useOrderIndexes(i)}.rxns),1); -0391 rxnsToRemove(rxnsToKeep) = 0; -0392 rxnsToRemove = find(rxnsToRemove); -0393 models{useOrderIndexes(i)}=removeReactions(models{useOrderIndexes(i)},rxnsToRemove,true,true,true); -0394 end -0395 -0396 %Since mergeModels function will be used in the end, the models are -0397 %simplified further by deleting genes/reactions in the order specified by -0398 %preferredOrder. This means that the last model will only contain reactions -0399 %for genes that mapped only to that model -0400 -0401 allUsedGenes=false(numel(allGenes{1}),1); -0402 -0403 if ~isempty(preferredOrder) && numel(models)>1 -0404 [usedGenes, ~]=find(finalMappings{1}); %All that are used in the first model in preferredOrder -0405 allUsedGenes(usedGenes)=true; -0406 for i=2:numel(finalMappings) -0407 [usedGenes, ~]=find(finalMappings{i}); -0408 usedGenes=unique(usedGenes); -0409 a=ismember(usedGenes,find(allUsedGenes)); -0410 -0411 [~, genesToDelete]=find(finalMappings{i}(usedGenes(a),:)); %IMPORTANT! IS it really correct to remove all genes that map? -0412 genesToDelete=unique(genesToDelete); %Maybe not needed, but for clarity if nothing else +0301 allTo=cell(numel(useOrder)-1,1); +0302 allFrom=cell(numel(useOrder)-1,1); +0303 +0304 for i=1:numel(useOrder)-1 +0305 allTo{i}=sparse(numel(allGenes{1}),numel(allGenes{i+1})); +0306 allFrom{i}=sparse(numel(allGenes{1}),numel(allGenes{i+1})); +0307 end +0308 +0309 %Fill the matches to other species +0310 for i=1:numel(blastStructure) +0311 if strcmp(blastStructure(i).toId,getModelFor) +0312 %This was 'to' the new organism. They should all match so no checks +0313 %are being made +0314 [~, a]=ismember(blastStructure(i).toGenes,allGenes{1}); +0315 [~, fromModel]=ismember(blastStructure(i).fromId,useOrder); +0316 [~, b]=ismember(blastStructure(i).fromGenes,allGenes{fromModel}); +0317 idx = sub2ind(size(allTo{fromModel-1}), a, b); +0318 allTo{fromModel-1}(idx)=1; +0319 else +0320 %This was 'from' the new organism +0321 [~, a]=ismember(blastStructure(i).fromGenes,allGenes{1}); +0322 [~, toModel]=ismember(blastStructure(i).toId,useOrder); +0323 [~, b]=ismember(blastStructure(i).toGenes,allGenes{toModel}); +0324 idx = sub2ind(size(allFrom{toModel-1}), a, b); +0325 allFrom{toModel-1}(idx)=1; +0326 end +0327 end +0328 +0329 %Now we have all the gene matches in a convenient way. For all the genes in +0330 %the new organism get the genes that should be included from other +0331 %organisms. If all genes should be included this simply means keep the +0332 %allFrom matrix as it is. If only orthologs which could be mapped in both +0333 %BLAST directions are to be included then only those elements are kept. +0334 +0335 finalMappings=cell(numel(useOrder)-1,1); +0336 if strictness==1 || strictness==3 +0337 for j=1:numel(allFrom) +0338 finalMappings{j}=allTo{j}~=0 & allFrom{j}~=0; +0339 end +0340 else +0341 if mapNewGenesToOld==true +0342 finalMappings=allFrom; +0343 else +0344 finalMappings=allTo; +0345 end +0346 end +0347 +0348 %Remove all genes from the mapping that are not in the models. This doesn't +0349 %do much if only genes in the models were used for the original mapping. +0350 %Also simplify the finalMapping and allGenes structures so that they only +0351 %contain mappings that exist +0352 usedNewGenes=false(numel(allGenes{1}),1); +0353 +0354 for i=1:numel(allGenes)-1 +0355 %First remove mappings for those genes that are not in the model +0356 if onlyGenesInModels==false +0357 a=ismember(allGenes{i+1},models{useOrderIndexes(i)}.genes); +0358 finalMappings{i}(:,~a)=false; +0359 end +0360 +0361 %Then remove unused ones and simplify +0362 [a, b]=find(finalMappings{i}); +0363 usedGenes=false(numel(allGenes{i+1}),1); +0364 usedNewGenes(a)=true; +0365 usedGenes(b)=true; +0366 finalMappings{i}=finalMappings{i}(:,usedGenes); +0367 allGenes{i+1}=allGenes{i+1}(usedGenes); +0368 end +0369 +0370 %Remove all new genes that have not been mapped to anything +0371 allGenes{1}=allGenes{1}(usedNewGenes); +0372 for i=1:numel(finalMappings) +0373 finalMappings{i}=finalMappings{i}(usedNewGenes,:); +0374 end +0375 +0376 %Now is it time to choose which reactions should be included from which +0377 %models. If there is a preferred order specified then each gene can only +0378 %result in reactions from one model, otherwise they should all be included +0379 +0380 %Start by simplifying the models by removing genes/reactions that are not +0381 %used. This is where it gets weird with complexes, especially "or" +0382 %complexes. In this step only reactions which are encoded by one single +0383 %gene, or where all genes should be deleted, are deleted. The info on the +0384 %full complex is still present in the grRules +0385 +0386 for i=1:numel(models) +0387 a=ismember(models{useOrderIndexes(i)}.genes,allGenes{i+1}); +0388 +0389 %Remove reactions that are not associated to any of the genes in +0390 %allGenes, thereby also keeping complexes where only for one of the +0391 %genes was matched +0392 [rxnsToKeep,~] = find(models{useOrderIndexes(i)}.rxnGeneMat(:,a)); +0393 rxnsToRemove = repmat(1,numel(models{useOrderIndexes(i)}.rxns),1); +0394 rxnsToRemove(rxnsToKeep) = 0; +0395 rxnsToRemove = find(rxnsToRemove); +0396 models{useOrderIndexes(i)}=removeReactions(models{useOrderIndexes(i)},rxnsToRemove,true,true,true); +0397 end +0398 +0399 %Since mergeModels function will be used in the end, the models are +0400 %simplified further by deleting genes/reactions in the order specified by +0401 %preferredOrder. This means that the last model will only contain reactions +0402 %for genes that mapped only to that model +0403 +0404 allUsedGenes=false(numel(allGenes{1}),1); +0405 +0406 if ~isempty(preferredOrder) && numel(models)>1 +0407 [usedGenes, ~]=find(finalMappings{1}); %All that are used in the first model in preferredOrder +0408 allUsedGenes(usedGenes)=true; +0409 for i=2:numel(finalMappings) +0410 [usedGenes, ~]=find(finalMappings{i}); +0411 usedGenes=unique(usedGenes); +0412 a=ismember(usedGenes,find(allUsedGenes)); 0413 -0414 %Remove all the genes that were already found and add the other -0415 %ones to allUsedGenes -0416 models{useOrderIndexes(i)}=removeGenes(models{useOrderIndexes(i)},allGenes{i+1}(genesToDelete),true,true,false); -0417 allUsedGenes(usedGenes)=true; -0418 -0419 %Remove the deleted genes from finalMappings and allGenes. -0420 finalMappings{i}(:,genesToDelete)=[]; -0421 allGenes{i+1}(genesToDelete)=[]; -0422 end -0423 end -0424 -0425 %Now loop through the models and update the gene associations. Genes not -0426 %belonging to the new organism will be renamed as 'OLD_MODELID_gene' -0427 for i=1:numel(models) -0428 %Find all the new genes that should be used for this model -0429 [newGenes, oldGenes]=find(finalMappings{i}); -0430 -0431 %Create a new gene list with the genes from the new organism and those -0432 %genes that could not be removed -0433 replaceableGenes=allGenes{i+1}(unique(oldGenes)); -0434 nonReplaceableGenes=setdiff(models{useOrderIndexes(i)}.genes,replaceableGenes); -0435 fullGeneList=[allGenes{1}(unique(newGenes));nonReplaceableGenes]; -0436 -0437 %Just to save some indexing later. This is the LAST index of -0438 %replaceable ones -0439 nonRepStartIndex=numel(unique(newGenes)); -0440 -0441 %Construct a new rxnGeneMat -0442 newRxnGeneMat=sparse(numel(models{useOrderIndexes(i)}.rxns),numel(fullGeneList)); +0414 [~, genesToDelete]=find(finalMappings{i}(usedGenes(a),:)); %IMPORTANT! IS it really correct to remove all genes that map? +0415 genesToDelete=unique(genesToDelete); %Maybe not needed, but for clarity if nothing else +0416 +0417 %Remove all the genes that were already found and add the other +0418 %ones to allUsedGenes +0419 models{useOrderIndexes(i)}=removeGenes(models{useOrderIndexes(i)},allGenes{i+1}(genesToDelete),true,true,false); +0420 allUsedGenes(usedGenes)=true; +0421 +0422 %Remove the deleted genes from finalMappings and allGenes. +0423 finalMappings{i}(:,genesToDelete)=[]; +0424 allGenes{i+1}(genesToDelete)=[]; +0425 end +0426 end +0427 +0428 %Now loop through the models and update the gene associations. Genes not +0429 %belonging to the new organism will be renamed as 'OLD_MODELID_gene' +0430 for i=1:numel(models) +0431 %Find all the new genes that should be used for this model +0432 [newGenes, oldGenes]=find(finalMappings{i}); +0433 +0434 %Create a new gene list with the genes from the new organism and those +0435 %genes that could not be removed +0436 replaceableGenes=allGenes{i+1}(unique(oldGenes)); +0437 nonReplaceableGenes=setdiff(models{useOrderIndexes(i)}.genes,replaceableGenes); +0438 fullGeneList=[allGenes{1}(unique(newGenes));nonReplaceableGenes]; +0439 +0440 %Just to save some indexing later. This is the LAST index of +0441 %replaceable ones +0442 nonRepStartIndex=numel(unique(newGenes)); 0443 -0444 %Now update the rxnGeneMat. This is a little tricky and could -0445 %probably be done in a more efficient way, but I just loop through the -0446 %reactions and add them one by one -0447 for j=1:numel(models{useOrderIndexes(i)}.rxns) -0448 %Get the old genes encoding for this reaction -0449 [~, oldGeneIds]=find(models{useOrderIndexes(i)}.rxnGeneMat(j,:)); -0450 -0451 %Update the matrix for each gene. This includes replacing one gene -0452 %with several new ones if there were several matches -0453 for k=1:numel(oldGeneIds) -0454 %Match the gene to one in the gene list. This is done as a text -0455 %match. Could probably be done better, but I'm a little lost in -0456 %the indexing -0457 -0458 geneName=models{useOrderIndexes(i)}.genes(oldGeneIds(k)); -0459 -0460 %First search in the mappable genes -0461 mapIndex=find(ismember(allGenes{i+1},geneName)); +0444 %Construct a new rxnGeneMat +0445 newRxnGeneMat=sparse(numel(models{useOrderIndexes(i)}.rxns),numel(fullGeneList)); +0446 +0447 %Now update the rxnGeneMat. This is a little tricky and could +0448 %probably be done in a more efficient way, but I just loop through the +0449 %reactions and add them one by one +0450 for j=1:numel(models{useOrderIndexes(i)}.rxns) +0451 %Get the old genes encoding for this reaction +0452 [~, oldGeneIds]=find(models{useOrderIndexes(i)}.rxnGeneMat(j,:)); +0453 +0454 %Update the matrix for each gene. This includes replacing one gene +0455 %with several new ones if there were several matches +0456 for k=1:numel(oldGeneIds) +0457 %Match the gene to one in the gene list. This is done as a text +0458 %match. Could probably be done better, but I'm a little lost in +0459 %the indexing +0460 +0461 geneName=models{useOrderIndexes(i)}.genes(oldGeneIds(k)); 0462 -0463 if ~isempty(mapIndex) -0464 % add the old genes -0465 hitGenes.oldGenes = [hitGenes.oldGenes, {geneName}]; -0466 -0467 %Get the new genes for that gene -0468 a=find(finalMappings{i}(:,mapIndex)); +0463 %First search in the mappable genes +0464 mapIndex=find(ismember(allGenes{i+1},geneName)); +0465 +0466 if ~isempty(mapIndex) +0467 % add the old genes +0468 hitGenes.oldGenes = [hitGenes.oldGenes, {geneName}]; 0469 -0470 %Find the positions of these genes in the final gene list -0471 [~, b]=ismember(allGenes{1}(a),fullGeneList); +0470 %Get the new genes for that gene +0471 a=find(finalMappings{i}(:,mapIndex)); 0472 -0473 %Update the matrix -0474 newRxnGeneMat(j,b)=1; +0473 %Find the positions of these genes in the final gene list +0474 [~, b]=ismember(allGenes{1}(a),fullGeneList); 0475 -0476 %Update the grRules string. This is tricky, but I hope that -0477 %it's ok to replace the old gene name with the new one and -0478 %add ') or (' if there were several matches. Be sure of -0479 %this! -0480 repString=fullGeneList{b(1)}; -0481 if numel(b)>1 -0482 for l=2:numel(b) -0483 repString=[repString ' or ' fullGeneList{b(l)}]; -0484 end -0485 repString=['(' repString ')']; -0486 end -0487 -0488 % add the new matched genes -0489 hitGenes.newGenes = [hitGenes.newGenes, {repString}]; +0476 %Update the matrix +0477 newRxnGeneMat(j,b)=1; +0478 +0479 %Update the grRules string. This is tricky, but I hope that +0480 %it's ok to replace the old gene name with the new one and +0481 %add ') or (' if there were several matches. Be sure of +0482 %this! +0483 repString=fullGeneList{b(1)}; +0484 if numel(b)>1 +0485 for l=2:numel(b) +0486 repString=[repString ' or ' fullGeneList{b(l)}]; +0487 end +0488 repString=['(' repString ')']; +0489 end 0490 -0491 %Use regexprep instead of strrep to prevent partial matches -0492 models{useOrderIndexes(i)}.grRules{j}=regexprep(models{useOrderIndexes(i)}.grRules{j},['(^|\s|\()' geneName{1} '($|\s|\))'],['$1' repString '$2']); -0493 else -0494 %Then search in the non-replaceable genes. There could only -0495 %be one match here -0496 index=find(ismember(nonReplaceableGenes,geneName)); -0497 -0498 %Update the matrix -0499 newRxnGeneMat(j,nonRepStartIndex+index)=1; +0491 % add the new matched genes +0492 hitGenes.newGenes = [hitGenes.newGenes, {repString}]; +0493 +0494 %Use regexprep instead of strrep to prevent partial matches +0495 models{useOrderIndexes(i)}.grRules{j}=regexprep(models{useOrderIndexes(i)}.grRules{j},['(^|\s|\()' geneName{1} '($|\s|\))'],['$1' repString '$2']); +0496 else +0497 %Then search in the non-replaceable genes. There could only +0498 %be one match here +0499 index=find(ismember(nonReplaceableGenes,geneName)); 0500 -0501 models{useOrderIndexes(i)}.grRules{j}=strrep(models{useOrderIndexes(i)}.grRules{j},geneName{1},strcat('OLD_',models{useOrderIndexes(i)}.id,'_',geneName{1})); -0502 end -0503 end -0504 end -0505 -0506 %Add the new list of genes -0507 models{useOrderIndexes(i)}.rxnGeneMat=newRxnGeneMat; -0508 if ~isempty(nonReplaceableGenes) -0509 models{useOrderIndexes(i)}.genes=[allGenes{1}(unique(newGenes));strcat('OLD_',models{useOrderIndexes(i)}.id,'_',nonReplaceableGenes)]; -0510 else -0511 models{useOrderIndexes(i)}.genes=allGenes{1}(unique(newGenes)); -0512 end -0513 if isfield(models{useOrderIndexes(i)},'geneComps') -0514 geneComps=models{useOrderIndexes(i)}.geneComps(1); -0515 models{useOrderIndexes(i)}.geneComps=zeros(numel(models{useOrderIndexes(i)}.genes),1); -0516 %Assume that all genes are in the same compartment, and this -0517 %compartment is specified for the first gene -0518 models{useOrderIndexes(i)}.geneComps(:)=geneComps; -0519 end -0520 end -0521 -0522 %Now merge the models. All information should be correct except for 'or' -0523 %complexes -0524 draftModel=mergeModels(models,'metNames'); -0525 -0526 %Remove unnecessary OLD_ genes, that were added with OR relationships -0527 regexStr=['OLD_(', strjoin(modelNames(:),'|'),')_(\S^\))+']; -0528 draftModel.grRules=regexprep(draftModel.grRules,[' or ' regexStr],''); -0529 draftModel.grRules=regexprep(draftModel.grRules,[regexStr ' or '],''); -0530 -0531 %Change name of the resulting model -0532 draftModel.id=getModelFor; -0533 name='Generated by getModelFromHomology using '; -0534 for i=1:numel(models) -0535 if i<numel(models) -0536 name=[name models{i}.id ', ']; -0537 else -0538 name=[name models{i}.id]; -0539 end -0540 end -0541 draftModel.name=name; -0542 draftModel.rxnNotes=cell(length(draftModel.rxns),1); -0543 draftModel.rxnNotes(:)={'Included by getModelFromHomology'}; -0544 draftModel.rxnConfidenceScores=NaN(length(draftModel.rxns),1); -0545 draftModel.rxnConfidenceScores(:)=2; -0546 draftModel=deleteUnusedGenes(draftModel,0); -0547 %Standardize grRules and notify if problematic grRules are found -0548 [draftModel.grRules,draftModel.rxnGeneMat]=standardizeGrRules(draftModel,false); -0549 draftModel=deleteUnusedGenes(draftModel,false); -0550 end +0501 %Update the matrix +0502 newRxnGeneMat(j,nonRepStartIndex+index)=1; +0503 +0504 models{useOrderIndexes(i)}.grRules{j}=strrep(models{useOrderIndexes(i)}.grRules{j},geneName{1},strcat('OLD_',models{useOrderIndexes(i)}.id,'_',geneName{1})); +0505 end +0506 end +0507 end +0508 +0509 %Add the new list of genes +0510 models{useOrderIndexes(i)}.rxnGeneMat=newRxnGeneMat; +0511 if ~isempty(nonReplaceableGenes) +0512 models{useOrderIndexes(i)}.genes=[allGenes{1}(unique(newGenes));strcat('OLD_',models{useOrderIndexes(i)}.id,'_',nonReplaceableGenes)]; +0513 else +0514 models{useOrderIndexes(i)}.genes=allGenes{1}(unique(newGenes)); +0515 end +0516 if isfield(models{useOrderIndexes(i)},'geneComps') +0517 geneComps=models{useOrderIndexes(i)}.geneComps(1); +0518 models{useOrderIndexes(i)}.geneComps=zeros(numel(models{useOrderIndexes(i)}.genes),1); +0519 %Assume that all genes are in the same compartment, and this +0520 %compartment is specified for the first gene +0521 models{useOrderIndexes(i)}.geneComps(:)=geneComps; +0522 end +0523 end +0524 +0525 %Now merge the models. All information should be correct except for 'or' +0526 %complexes +0527 draftModel=mergeModels(models,'metNames'); +0528 +0529 %Remove unnecessary OLD_ genes, that were added with OR relationships +0530 regexStr=['OLD_(', strjoin(modelNames(:),'|'),')_(\S^\))+']; +0531 draftModel.grRules=regexprep(draftModel.grRules,[' or ' regexStr],''); +0532 draftModel.grRules=regexprep(draftModel.grRules,[regexStr ' or '],''); +0533 +0534 %Change name of the resulting model +0535 draftModel.id=getModelFor; +0536 name='Generated by getModelFromHomology using '; +0537 for i=1:numel(models) +0538 if i<numel(models) +0539 name=[name models{i}.id ', ']; +0540 else +0541 name=[name models{i}.id]; +0542 end +0543 end +0544 draftModel.name=name; +0545 draftModel.rxnNotes=cell(length(draftModel.rxns),1); +0546 draftModel.rxnNotes(:)={'Included by getModelFromHomology'}; +0547 draftModel.rxnConfidenceScores=NaN(length(draftModel.rxns),1); +0548 draftModel.rxnConfidenceScores(:)=2; +0549 draftModel=deleteUnusedGenes(draftModel,0); +0550 %Standardize grRules and notify if problematic grRules are found +0551 [draftModel.grRules,draftModel.rxnGeneMat]=standardizeGrRules(draftModel,false); +0552 draftModel=deleteUnusedGenes(draftModel,false); +0553 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/mergeModels.html b/doc/core/mergeModels.html index 66f07d14..2144f402 100644 --- a/doc/core/mergeModels.html +++ b/doc/core/mergeModels.html @@ -553,104 +553,124 @@

SOURCE CODE ^if isfield(models{i},'geneShortNames') 0493 model.geneShortNames=models{i}.geneShortNames; 0494 end -0495 -0496 if isfield(models{i},'geneMiriams') -0497 model.geneMiriams=models{i}.geneMiriams; +0495 +0496 if isfield(models{i},'proteinNames') +0497 model.proteinNames=models{i}.proteinNames; 0498 end -0499 -0500 if isfield(models{i},'geneComps') -0501 model.geneComps=models{i}.geneComps; +0499 +0500 if isfield(models{i},'geneMiriams') +0501 model.geneMiriams=models{i}.geneMiriams; 0502 end -0503 else -0504 %If gene info should be merged -0505 a=ismember(models{i}.genes,model.genes); -0506 -0507 genesToAdd=find(~a); -0508 -0509 %Only add extra gene info on new genes. This might not be -0510 %correct and should be changed later... -0511 if ~isempty(genesToAdd) -0512 model.genes=[model.genes;models{i}.genes(genesToAdd)]; -0513 emptyGene=cell(numel(genesToAdd),1); -0514 emptyGene(:)={models{i}.id}; -0515 model.geneFrom=[model.geneFrom;emptyGene]; -0516 model.rxnGeneMat=[model.rxnGeneMat sparse(size(model.rxnGeneMat,1),numel(genesToAdd))]; -0517 -0518 if isfield(models{i},'geneShortNames') -0519 if isfield(model,'geneShortNames') -0520 model.geneShortNames=[model.geneShortNames;models{i}.geneShortNames(genesToAdd)]; -0521 else -0522 emptyGeneSN=cell(numel(model.genes)-numel(genesToAdd),1); -0523 emptyGeneSN(:)={''}; -0524 model.geneShortNames=[emptyGeneSN;models{i}.geneShortNames(genesToAdd)]; -0525 end -0526 else -0527 if isfield(model,'geneShortNames') -0528 emptyGeneSN=cell(numel(genesToAdd),1); -0529 emptyGeneSN(:)={''}; -0530 model.geneShortNames=[model.geneShortNames;emptyGeneSN]; -0531 end -0532 end -0533 -0534 if isfield(models{i},'geneMiriams') -0535 if isfield(model,'geneMiriams') -0536 model.geneMiriams=[model.geneMiriams;models{i}.geneMiriams(genesToAdd)]; -0537 else -0538 emptyGeneMir=cell(numel(model.genes)-numel(genesToAdd),1); -0539 model.geneMiriams=[emptyGeneMir;models{i}.geneMiriams(genesToAdd)]; -0540 end -0541 else -0542 if isfield(model,'geneMiriams') -0543 emptyGeneMir=cell(numel(genesToAdd),1); -0544 model.geneMiriams=[model.geneMiriams;emptyGeneMir]; +0503 +0504 if isfield(models{i},'geneComps') +0505 model.geneComps=models{i}.geneComps; +0506 end +0507 else +0508 %If gene info should be merged +0509 a=ismember(models{i}.genes,model.genes); +0510 +0511 genesToAdd=find(~a); +0512 +0513 %Only add extra gene info on new genes. This might not be +0514 %correct and should be changed later... +0515 if ~isempty(genesToAdd) +0516 model.genes=[model.genes;models{i}.genes(genesToAdd)]; +0517 emptyGene=cell(numel(genesToAdd),1); +0518 emptyGene(:)={models{i}.id}; +0519 model.geneFrom=[model.geneFrom;emptyGene]; +0520 model.rxnGeneMat=[model.rxnGeneMat sparse(size(model.rxnGeneMat,1),numel(genesToAdd))]; +0521 +0522 if isfield(models{i},'geneShortNames') +0523 if isfield(model,'geneShortNames') +0524 model.geneShortNames=[model.geneShortNames;models{i}.geneShortNames(genesToAdd)]; +0525 else +0526 emptyGeneSN=cell(numel(model.genes)-numel(genesToAdd),1); +0527 emptyGeneSN(:)={''}; +0528 model.geneShortNames=[emptyGeneSN;models{i}.geneShortNames(genesToAdd)]; +0529 end +0530 else +0531 if isfield(model,'geneShortNames') +0532 emptyGeneSN=cell(numel(genesToAdd),1); +0533 emptyGeneSN(:)={''}; +0534 model.geneShortNames=[model.geneShortNames;emptyGeneSN]; +0535 end +0536 end +0537 +0538 if isfield(models{i},'proteinNames') +0539 if isfield(model,'proteinNames') +0540 model.proteinNames=[model.proteinNames;models{i}.proteinNames(genesToAdd)]; +0541 else +0542 emptyGeneSN=cell(numel(model.genes)-numel(genesToAdd),1); +0543 emptyGeneSN(:)={''}; +0544 model.proteinNames=[emptyGeneSN;models{i}.proteinNames(genesToAdd)]; 0545 end -0546 end -0547 -0548 if isfield(models{i},'geneComps') -0549 if isfield(model,'geneComps') -0550 model.geneComps=[model.geneComps;models{i}.geneComps(genesToAdd)]; -0551 else -0552 emptyGeneMir=ones(numel(model.genes)-numel(genesToAdd),1); -0553 model.geneComps=[emptyGeneMir;models{i}.geneComps(genesToAdd)]; -0554 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; -0555 dispEM(EM,false); -0556 end -0557 else -0558 if isfield(model,'geneComps') -0559 emptyGeneMir=ones(numel(genesToAdd),1); -0560 model.geneComps=[model.geneComps;emptyGeneMir]; -0561 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; -0562 dispEM(EM,false); -0563 end -0564 end -0565 end -0566 -0567 %Remap the genes from the new model. The same thing as with -0568 %mets; this is a wasteful way to do it but I don't care right -0569 %now -0570 [a, b]=ismember(models{i}.genes,model.genes); -0571 -0572 %Just a check -0573 if ~all(a) -0574 EM='There was an unexpected error in matching genes'; -0575 dispEM(EM); -0576 end -0577 model.grRules=[model.grRules;models{i}.grRules]; -0578 end -0579 else -0580 %Add empty gene associations -0581 if isfield(model,'genes') -0582 emptyGene=cell(numel(models{i}.rxns),1); -0583 emptyGene(:)={''}; -0584 model.grRules=[model.grRules;emptyGene]; -0585 end -0586 end -0587 end -0588 %Fix grRules and reconstruct rxnGeneMat -0589 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0590 model.grRules = grRules; -0591 model.rxnGeneMat = rxnGeneMat; -0592 end +0546 else +0547 if isfield(model,'proteinNames') +0548 emptyGeneSN=cell(numel(genesToAdd),1); +0549 emptyGeneSN(:)={''}; +0550 model.proteinNames=[model.proteinNames;emptyGeneSN]; +0551 end +0552 end +0553 +0554 if isfield(models{i},'geneMiriams') +0555 if isfield(model,'geneMiriams') +0556 model.geneMiriams=[model.geneMiriams;models{i}.geneMiriams(genesToAdd)]; +0557 else +0558 emptyGeneMir=cell(numel(model.genes)-numel(genesToAdd),1); +0559 model.geneMiriams=[emptyGeneMir;models{i}.geneMiriams(genesToAdd)]; +0560 end +0561 else +0562 if isfield(model,'geneMiriams') +0563 emptyGeneMir=cell(numel(genesToAdd),1); +0564 model.geneMiriams=[model.geneMiriams;emptyGeneMir]; +0565 end +0566 end +0567 +0568 if isfield(models{i},'geneComps') +0569 if isfield(model,'geneComps') +0570 model.geneComps=[model.geneComps;models{i}.geneComps(genesToAdd)]; +0571 else +0572 emptyGeneMir=ones(numel(model.genes)-numel(genesToAdd),1); +0573 model.geneComps=[emptyGeneMir;models{i}.geneComps(genesToAdd)]; +0574 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; +0575 dispEM(EM,false); +0576 end +0577 else +0578 if isfield(model,'geneComps') +0579 emptyGeneMir=ones(numel(genesToAdd),1); +0580 model.geneComps=[model.geneComps;emptyGeneMir]; +0581 EM='Adding genes with compartment information to a model without such information. All existing genes will be assigned to the first compartment'; +0582 dispEM(EM,false); +0583 end +0584 end +0585 end +0586 +0587 %Remap the genes from the new model. The same thing as with +0588 %mets; this is a wasteful way to do it but I don't care right +0589 %now +0590 [a, b]=ismember(models{i}.genes,model.genes); +0591 +0592 %Just a check +0593 if ~all(a) +0594 EM='There was an unexpected error in matching genes'; +0595 dispEM(EM); +0596 end +0597 model.grRules=[model.grRules;models{i}.grRules]; +0598 end +0599 else +0600 %Add empty gene associations +0601 if isfield(model,'genes') +0602 emptyGene=cell(numel(models{i}.rxns),1); +0603 emptyGene(:)={''}; +0604 model.grRules=[model.grRules;emptyGene]; +0605 end +0606 end +0607 end +0608 %Fix grRules and reconstruct rxnGeneMat +0609 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0610 model.grRules = grRules; +0611 model.rxnGeneMat = rxnGeneMat; +0612 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/permuteModel.html b/doc/core/permuteModel.html index f9ef17da..64d94afe 100644 --- a/doc/core/permuteModel.html +++ b/doc/core/permuteModel.html @@ -189,37 +189,40 @@

SOURCE CODE ^if isfield(newModel,'geneShortNames') 0133 newModel.geneShortNames=newModel.geneShortNames(indexes); 0134 end -0135 if isfield(newModel,'rxnGeneMat') -0136 newModel.rxnGeneMat=newModel.rxnGeneMat(:,indexes); +0135 if isfield(newModel,'proteinNames') +0136 newModel.proteinNames=newModel.proteinNames(indexes); 0137 end -0138 case 'comps' -0139 if isfield(newModel,'comps') -0140 newModel.comps=newModel.comps(indexes); -0141 end -0142 if isfield(newModel,'compNames') -0143 newModel.compNames=newModel.compNames(indexes); +0138 if isfield(newModel,'rxnGeneMat') +0139 newModel.rxnGeneMat=newModel.rxnGeneMat(:,indexes); +0140 end +0141 case 'comps' +0142 if isfield(newModel,'comps') +0143 newModel.comps=newModel.comps(indexes); 0144 end -0145 if isfield(newModel,'compOutside') -0146 newModel.compOutside=newModel.compOutside(indexes); +0145 if isfield(newModel,'compNames') +0146 newModel.compNames=newModel.compNames(indexes); 0147 end -0148 if isfield(newModel,'compMiriams') -0149 newModel.compMiriams=newModel.compMiriams(indexes); +0148 if isfield(newModel,'compOutside') +0149 newModel.compOutside=newModel.compOutside(indexes); 0150 end -0151 [~,J]=sort(indexes); % The *index* of compartment is used in next fields -0152 if isfield(newModel,'metComps') -0153 [toreplace, bywhat] = ismember(newModel.metComps,1:length(J)); -0154 newModel.metComps(toreplace) = J(bywhat(toreplace)); -0155 end -0156 if isfield(model,'rxnComps') -0157 [toreplace, bywhat] = ismember(model.rxnComps,1:length(J)); -0158 model.rxnComps(toreplace) = J(bywhat(toreplace)); -0159 end -0160 if isfield(model,'geneComps') -0161 [toreplace, bywhat] = ismember(model.geneComps,1:length(J)); -0162 model.geneComps(toreplace) = J(bywhat(toreplace)); -0163 end -0164 end -0165 end +0151 if isfield(newModel,'compMiriams') +0152 newModel.compMiriams=newModel.compMiriams(indexes); +0153 end +0154 [~,J]=sort(indexes); % The *index* of compartment is used in next fields +0155 if isfield(newModel,'metComps') +0156 [toreplace, bywhat] = ismember(newModel.metComps,1:length(J)); +0157 newModel.metComps(toreplace) = J(bywhat(toreplace)); +0158 end +0159 if isfield(model,'rxnComps') +0160 [toreplace, bywhat] = ismember(model.rxnComps,1:length(J)); +0161 model.rxnComps(toreplace) = J(bywhat(toreplace)); +0162 end +0163 if isfield(model,'geneComps') +0164 [toreplace, bywhat] = ismember(model.geneComps,1:length(J)); +0165 model.geneComps(toreplace) = J(bywhat(toreplace)); +0166 end +0167 end +0168 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/predictLocalization.html b/doc/core/predictLocalization.html index c204b708..3c8b4a08 100644 --- a/doc/core/predictLocalization.html +++ b/doc/core/predictLocalization.html @@ -304,931 +304,940 @@

SOURCE CODE ^if isfield(model,'geneMiriams') 0200 model.geneMiriams=[model.geneMiriams;{[]}]; 0201 end -0202 if isfield(model,'geneFrom') -0203 model.geneFrom=[model.geneFrom;{{'FAKE'}}]; +0202 if isfield(model,'proteinNames') +0203 model.proteinNames=[model.proteinNames;{[]}]; 0204 end -0205 model.rxnGeneMat(I(i),numel(model.genes))=1; -0206 model.grRules{I(i)}=''; -0207 end -0208 -0209 %Update the GSS. All genes, fake or real, for which there is no evidence -0210 %gets a score 0.5 in all compartments. Also just to make it easier further -0211 %on -0212 I=setdiff(model.genes,GSS.genes); -0213 GSS.genes=[GSS.genes;I]; -0214 GSS.scores=[GSS.scores;ones(numel(I),numel(GSS.compartments))*0.5]; -0215 -0216 %Gene complexes should be moved together in order to be biologically -0217 %relevant. The average score for the genes is used for each compartment. -0218 %This is done by changing the model so that gene complexes are used as a -0219 %single gene name and then a score is calculated for that "gene". -0220 -0221 %Only "and"-relationships exist after expandModel -0222 genes=unique(model.grRules); -0223 nGenes=strrep(genes,'(',''); -0224 nGenes=strrep(nGenes,')',''); -0225 %nGenes=strrep(nGenes,' and ','_and_'); -0226 complexes=setdiff(nGenes,model.genes); -0227 if ~isempty(complexes) -0228 if isempty(complexes{1}) %Empty grRules also come up here -0229 complexes(1)=[]; -0230 end -0231 end -0232 cScores=zeros(numel(complexes),numel(GSS.compartments)); -0233 for i=1:numel(complexes) -0234 genesInComplex=regexp(complexes{i},' and ','split'); -0235 -0236 %Find these genes in GSS -0237 [I, J]=ismember(genesInComplex,GSS.genes); +0205 if isfield(model,'geneFrom') +0206 model.geneFrom=[model.geneFrom;{{'FAKE'}}]; +0207 end +0208 model.rxnGeneMat(I(i),numel(model.genes))=1; +0209 model.grRules{I(i)}=''; +0210 end +0211 +0212 %Update the GSS. All genes, fake or real, for which there is no evidence +0213 %gets a score 0.5 in all compartments. Also just to make it easier further +0214 %on +0215 I=setdiff(model.genes,GSS.genes); +0216 GSS.genes=[GSS.genes;I]; +0217 GSS.scores=[GSS.scores;ones(numel(I),numel(GSS.compartments))*0.5]; +0218 +0219 %Gene complexes should be moved together in order to be biologically +0220 %relevant. The average score for the genes is used for each compartment. +0221 %This is done by changing the model so that gene complexes are used as a +0222 %single gene name and then a score is calculated for that "gene". +0223 +0224 %Only "and"-relationships exist after expandModel +0225 genes=unique(model.grRules); +0226 nGenes=strrep(genes,'(',''); +0227 nGenes=strrep(nGenes,')',''); +0228 %nGenes=strrep(nGenes,' and ','_and_'); +0229 complexes=setdiff(nGenes,model.genes); +0230 if ~isempty(complexes) +0231 if isempty(complexes{1}) %Empty grRules also come up here +0232 complexes(1)=[]; +0233 end +0234 end +0235 cScores=zeros(numel(complexes),numel(GSS.compartments)); +0236 for i=1:numel(complexes) +0237 genesInComplex=regexp(complexes{i},' and ','split'); 0238 -0239 if any(I) -0240 %Get the average of the genes that were found -0241 mScores=mean(GSS.scores(J(I),:)); -0242 -0243 %And add 0.5 for the genes that were not found in order to be -0244 %consistent with non-complexes -0245 mScores=(mScores.*sum(I)+(numel(genesInComplex)-sum(I))*0.5)/numel(genesInComplex); -0246 else -0247 EM=['Could not parse grRule "' complexes{i} '". Assigning score 0.0 in all compartments']; -0248 dispEM(EM,false); -0249 mScores=ones(1,numel(genesInComplex))*0.5; -0250 end -0251 cScores(i,:)=mScores; -0252 -0253 %Add this complex as a new gene -0254 model.genes=[model.genes;complexes{i}]; -0255 if isfield(model,'geneMiriams') -0256 model.geneMiriams=[model.geneMiriams;{[]}]; -0257 end -0258 if isfield(model,'geneShortNames') -0259 model.geneShortNames=[model.geneShortNames;{''}]; +0239 %Find these genes in GSS +0240 [I, J]=ismember(genesInComplex,GSS.genes); +0241 +0242 if any(I) +0243 %Get the average of the genes that were found +0244 mScores=mean(GSS.scores(J(I),:)); +0245 +0246 %And add 0.5 for the genes that were not found in order to be +0247 %consistent with non-complexes +0248 mScores=(mScores.*sum(I)+(numel(genesInComplex)-sum(I))*0.5)/numel(genesInComplex); +0249 else +0250 EM=['Could not parse grRule "' complexes{i} '". Assigning score 0.0 in all compartments']; +0251 dispEM(EM,false); +0252 mScores=ones(1,numel(genesInComplex))*0.5; +0253 end +0254 cScores(i,:)=mScores; +0255 +0256 %Add this complex as a new gene +0257 model.genes=[model.genes;complexes{i}]; +0258 if isfield(model,'geneMiriams') +0259 model.geneMiriams=[model.geneMiriams;{[]}]; 0260 end -0261 if isfield(model,'geneFrom') -0262 model.geneFrom=[model.geneFrom;{'COMPLEX'}]; +0261 if isfield(model,'geneShortNames') +0262 model.geneShortNames=[model.geneShortNames;{''}]; 0263 end -0264 %Find the reactions which had the original complex and change them to -0265 %use the new "gene" -0266 I=ismember(model.grRules,['(' complexes{i} ')']); -0267 -0268 %Should check more carefully if there can be an error here -0269 if ~isempty(I) -0270 model.rxnGeneMat(I,:)=0; %Ok since the split on "or" was applied -0271 model.rxnGeneMat(I,numel(model.genes))=1; -0272 end -0273 end -0274 -0275 %Add the new "genes" -0276 GSS.genes=[GSS.genes;complexes]; -0277 GSS.scores=[GSS.scores;cScores]; -0278 -0279 %After merging the complexes it could happen that there are genes that are -0280 %no longer in use. Delete such genes -0281 model=removeReactions(model,{},false,true); -0282 -0283 %Exchange reactions, defined as involving an unconstrained metabolite, are -0284 %special in that they have to stay in the defaultCompartment. This means -0285 %that uptake/excretion of metabolites is always via the default -0286 %compartment. This is a small simplification, but should be valid in most -0287 %cases -0288 [~, I]=getExchangeRxns(model); -0289 -0290 %It will be easier later on if the same place. Put them in the beginning -0291 J=1:numel(model.rxns); -0292 J(I)=[]; -0293 model=permuteModel(model,[I;J'],'rxns'); -0294 -0295 %Number of exchange reactions -0296 nER=numel(I); -0297 -0298 %Also put the exchange metabolites in the beginning -0299 if isfield(model,'unconstrained') -0300 I=find(model.unconstrained); -0301 J=1:numel(model.mets); -0302 J(I)=[]; -0303 model=permuteModel(model,[I;J'],'mets'); -0304 %Also reorder the transport costs -0305 transportCost=transportCost([I;J']); -0306 %Number of exchange metabolites -0307 nEM=numel(I); -0308 else -0309 nEM=0; -0310 end -0311 -0312 %There is no point of having genes for exchange reactions, so delete them. -0313 %Also to make computations easier -0314 model.rxnGeneMat(1:nER,:)=0; -0315 model.grRules(1:nER)={''}; -0316 -0317 %Remove unused genes -0318 model=removeReactions(model,{},false,true); -0319 -0320 %Remove genes with no match to the model and reorder so that the genes are -0321 %in the same order as model.genes. Since the fake genes are already added -0322 %so that all genes in model exist in GSS it is fine to do like this -0323 [~, J]=ismember(model.genes,GSS.genes); -0324 GSS.genes=model.genes; -0325 GSS.scores=GSS.scores(J,:); -0326 -0327 %Reorder the GSS so that the first index corresponds to the default -0328 %compartment -0329 [~, J]=ismember(defaultCompartment,GSS.compartments); -0330 reorder=1:numel(GSS.compartments); -0331 reorder(J)=[]; -0332 reorder=[J reorder]; -0333 GSS.scores=GSS.scores(:,reorder); -0334 GSS.compartments=GSS.compartments(reorder); -0335 -0336 %Since it is only checked whether the metabolites can be synthesized, there -0337 %is no need to care about the stoichiometry. Change to -1/1 to simplify -0338 %later. Keep the S matrix for later though -0339 oldS=model.S; -0340 model.S(model.S>0)=1; -0341 model.S(model.S<0)=-1; -0342 -0343 %Here is a bit of a trick. To avoid the recurring calculation which -0344 %reactions are reversible, the reversible reactions have the coefficients -0345 %-10/10 instead of -1/1 -0346 model.S(:,model.rev==1)=model.S(:,model.rev==1).*10; -0347 -0348 %***Begin problem formulation -0349 -0350 %Some numbers that are good to have -0351 nRxns=numel(model.rxns)-nER; %Excluding exchange rxns -0352 nMets=numel(model.mets)-nEM; %Excluding exchange mets -0353 nGenes=numel(model.genes); -0354 nComps=numel(GSS.compartments); +0264 if isfield(model,'proteinNames') +0265 model.proteinNames=[model.proteinNames;{''}]; +0266 end +0267 if isfield(model,'geneFrom') +0268 model.geneFrom=[model.geneFrom;{'COMPLEX'}]; +0269 end +0270 %Find the reactions which had the original complex and change them to +0271 %use the new "gene" +0272 I=ismember(model.grRules,['(' complexes{i} ')']); +0273 +0274 %Should check more carefully if there can be an error here +0275 if ~isempty(I) +0276 model.rxnGeneMat(I,:)=0; %Ok since the split on "or" was applied +0277 model.rxnGeneMat(I,numel(model.genes))=1; +0278 end +0279 end +0280 +0281 %Add the new "genes" +0282 GSS.genes=[GSS.genes;complexes]; +0283 GSS.scores=[GSS.scores;cScores]; +0284 +0285 %After merging the complexes it could happen that there are genes that are +0286 %no longer in use. Delete such genes +0287 model=removeReactions(model,{},false,true); +0288 +0289 %Exchange reactions, defined as involving an unconstrained metabolite, are +0290 %special in that they have to stay in the defaultCompartment. This means +0291 %that uptake/excretion of metabolites is always via the default +0292 %compartment. This is a small simplification, but should be valid in most +0293 %cases +0294 [~, I]=getExchangeRxns(model); +0295 +0296 %It will be easier later on if the same place. Put them in the beginning +0297 J=1:numel(model.rxns); +0298 J(I)=[]; +0299 model=permuteModel(model,[I;J'],'rxns'); +0300 +0301 %Number of exchange reactions +0302 nER=numel(I); +0303 +0304 %Also put the exchange metabolites in the beginning +0305 if isfield(model,'unconstrained') +0306 I=find(model.unconstrained); +0307 J=1:numel(model.mets); +0308 J(I)=[]; +0309 model=permuteModel(model,[I;J'],'mets'); +0310 %Also reorder the transport costs +0311 transportCost=transportCost([I;J']); +0312 %Number of exchange metabolites +0313 nEM=numel(I); +0314 else +0315 nEM=0; +0316 end +0317 +0318 %There is no point of having genes for exchange reactions, so delete them. +0319 %Also to make computations easier +0320 model.rxnGeneMat(1:nER,:)=0; +0321 model.grRules(1:nER)={''}; +0322 +0323 %Remove unused genes +0324 model=removeReactions(model,{},false,true); +0325 +0326 %Remove genes with no match to the model and reorder so that the genes are +0327 %in the same order as model.genes. Since the fake genes are already added +0328 %so that all genes in model exist in GSS it is fine to do like this +0329 [~, J]=ismember(model.genes,GSS.genes); +0330 GSS.genes=model.genes; +0331 GSS.scores=GSS.scores(J,:); +0332 +0333 %Reorder the GSS so that the first index corresponds to the default +0334 %compartment +0335 [~, J]=ismember(defaultCompartment,GSS.compartments); +0336 reorder=1:numel(GSS.compartments); +0337 reorder(J)=[]; +0338 reorder=[J reorder]; +0339 GSS.scores=GSS.scores(:,reorder); +0340 GSS.compartments=GSS.compartments(reorder); +0341 +0342 %Since it is only checked whether the metabolites can be synthesized, there +0343 %is no need to care about the stoichiometry. Change to -1/1 to simplify +0344 %later. Keep the S matrix for later though +0345 oldS=model.S; +0346 model.S(model.S>0)=1; +0347 model.S(model.S<0)=-1; +0348 +0349 %Here is a bit of a trick. To avoid the recurring calculation which +0350 %reactions are reversible, the reversible reactions have the coefficients +0351 %-10/10 instead of -1/1 +0352 model.S(:,model.rev==1)=model.S(:,model.rev==1).*10; +0353 +0354 %***Begin problem formulation 0355 -0356 %Create a big stoichiometric matrix that will be the current model. In -0357 %order to have faster simulations the maximal model size is declared and -0358 %reactions are then moved within it. -0359 -0360 %First the original model (with the first nE being exchange rxns), then -0361 %reserve space for number of rxns minus exchange rxns for each non-default -0362 %compartment, then transport reactions for all non-exchange mets between -0363 %the default compartment and all others. -0364 %NOTE: Kept eye()*0 since eye() can be used to include all transport from -0365 %the beginning -0366 s=repmat(eye(nMets)*0,1,nComps-1); -0367 s=[zeros(numel(model.mets)-nMets,size(s,2));s]; -0368 S=[model.S sparse(numel(model.mets),nRxns*(nComps-1)) s]; -0369 s=[sparse(nMets*(nComps-1),numel(model.rxns)+nRxns*(nComps-1)) eye(nMets*(nComps-1))*0]; -0370 S=[S;s]; -0371 -0372 %Also replicate the transport costs -0373 transportCost=[transportCost(1:nEM);repmat(transportCost(nEM+1:end),nComps,1)]; -0374 -0375 %Create a binary matrix that says where the genes are in the current -0376 %solution -0377 g2c=false(nGenes,nComps); -0378 %All genes start in the default compartment -0379 g2c(:,1)=true; +0356 %Some numbers that are good to have +0357 nRxns=numel(model.rxns)-nER; %Excluding exchange rxns +0358 nMets=numel(model.mets)-nEM; %Excluding exchange mets +0359 nGenes=numel(model.genes); +0360 nComps=numel(GSS.compartments); +0361 +0362 %Create a big stoichiometric matrix that will be the current model. In +0363 %order to have faster simulations the maximal model size is declared and +0364 %reactions are then moved within it. +0365 +0366 %First the original model (with the first nE being exchange rxns), then +0367 %reserve space for number of rxns minus exchange rxns for each non-default +0368 %compartment, then transport reactions for all non-exchange mets between +0369 %the default compartment and all others. +0370 %NOTE: Kept eye()*0 since eye() can be used to include all transport from +0371 %the beginning +0372 s=repmat(eye(nMets)*0,1,nComps-1); +0373 s=[zeros(numel(model.mets)-nMets,size(s,2));s]; +0374 S=[model.S sparse(numel(model.mets),nRxns*(nComps-1)) s]; +0375 s=[sparse(nMets*(nComps-1),numel(model.rxns)+nRxns*(nComps-1)) eye(nMets*(nComps-1))*0]; +0376 S=[S;s]; +0377 +0378 %Also replicate the transport costs +0379 transportCost=[transportCost(1:nEM);repmat(transportCost(nEM+1:end),nComps,1)]; 0380 -0381 %Start of main optimization loop -0382 tic; -0383 bestScore=-inf; -0384 bestS=[]; -0385 bestg2c=[]; +0381 %Create a binary matrix that says where the genes are in the current +0382 %solution +0383 g2c=false(nGenes,nComps); +0384 %All genes start in the default compartment +0385 g2c(:,1)=true; 0386 -0387 %Temp for testing -0388 plotScore=[]; -0389 nTrans=[]; -0390 totScore=[]; -0391 minScore=sum(min(GSS.scores,[],2)); -0392 maxScore=sum(max(GSS.scores,[],2)); -0393 -0394 while toc<maxTime*60 -0395 %Pick a random gene, weighted by it is current score minus the best -0396 %score for that gene (often 1.0, but can be 0.5 for no genes or average -0397 %for complexes). Genes with bad fits are more likely to be moved. This -0398 %formulation never moves a gene from its best compartment. Therefore a -0399 %small uniform weight is added -0400 [I, J]=find(g2c); -0401 geneToMove=randsample(nGenes,1,true,max(GSS.scores(I,:),[],2)-GSS.scores(sub2ind(size(g2c),I,J))+0.1); -0402 -0403 %Sample among possible compartments to move to. Add a larger weight to -0404 %even out the odds a little. Also a way of getting rid of loops where -0405 %the same set of genes are moved back and forth several times -0406 toComp=randsample(nComps,1,true,GSS.scores(geneToMove,:)+0.2); -0407 -0408 %Check that it moves to a new compartment -0409 if toComp==find(g2c(geneToMove,:)) -0410 continue; -0411 end -0412 -0413 %Moves the gene -0414 [newS, newg2c]=moveGene(S,model,g2c,geneToMove,toComp,nRxns,nMets); -0415 -0416 %Tries to connect the network. If this was not possible in 10 -0417 %iterations, then abort. If more than 20 modifications were needed then -0418 %it is unlikely that it will be a lower score -0419 wasConnected=false; -0420 for j=1:10 -0421 %Find the metabolites that are now unconnected -0422 unconnected=findUnconnected(newS,nEM); -0423 -0424 %Continue if there are still unconnected -0425 if any(unconnected) -0426 %For each gene find out how many of these could be connected if -0427 %the gene was moved and how many would be disconnected by -0428 %moving that gene -0429 [geneIndex, moveTo, deltaConnected, deltaScore]=selectGenes(newS,nEM,nMets,nER,nRxns,model,unconnected,g2c,GSS); -0430 -0431 %Score which gene would be the best to move. The highest -0432 %deltaScore is 1.0. It should be possible to move a gene from -0433 %worst to best compartment even if it disconnects, say, 1.5 -0434 %more metabolites -0435 [score, I]=max(1.5*deltaScore+deltaConnected); +0387 %Start of main optimization loop +0388 tic; +0389 bestScore=-inf; +0390 bestS=[]; +0391 bestg2c=[]; +0392 +0393 %Temp for testing +0394 plotScore=[]; +0395 nTrans=[]; +0396 totScore=[]; +0397 minScore=sum(min(GSS.scores,[],2)); +0398 maxScore=sum(max(GSS.scores,[],2)); +0399 +0400 while toc<maxTime*60 +0401 %Pick a random gene, weighted by it is current score minus the best +0402 %score for that gene (often 1.0, but can be 0.5 for no genes or average +0403 %for complexes). Genes with bad fits are more likely to be moved. This +0404 %formulation never moves a gene from its best compartment. Therefore a +0405 %small uniform weight is added +0406 [I, J]=find(g2c); +0407 geneToMove=randsample(nGenes,1,true,max(GSS.scores(I,:),[],2)-GSS.scores(sub2ind(size(g2c),I,J))+0.1); +0408 +0409 %Sample among possible compartments to move to. Add a larger weight to +0410 %even out the odds a little. Also a way of getting rid of loops where +0411 %the same set of genes are moved back and forth several times +0412 toComp=randsample(nComps,1,true,GSS.scores(geneToMove,:)+0.2); +0413 +0414 %Check that it moves to a new compartment +0415 if toComp==find(g2c(geneToMove,:)) +0416 continue; +0417 end +0418 +0419 %Moves the gene +0420 [newS, newg2c]=moveGene(S,model,g2c,geneToMove,toComp,nRxns,nMets); +0421 +0422 %Tries to connect the network. If this was not possible in 10 +0423 %iterations, then abort. If more than 20 modifications were needed then +0424 %it is unlikely that it will be a lower score +0425 wasConnected=false; +0426 for j=1:10 +0427 %Find the metabolites that are now unconnected +0428 unconnected=findUnconnected(newS,nEM); +0429 +0430 %Continue if there are still unconnected +0431 if any(unconnected) +0432 %For each gene find out how many of these could be connected if +0433 %the gene was moved and how many would be disconnected by +0434 %moving that gene +0435 [geneIndex, moveTo, deltaConnected, deltaScore]=selectGenes(newS,nEM,nMets,nER,nRxns,model,unconnected,g2c,GSS); 0436 -0437 %Check if it has to add a transport or if there is a gene that -0438 %could be moved order to have a more connected network -0439 hasToAddTransport=true; -0440 if ~isempty(deltaConnected) -0441 if score>0 -0442 hasToAddTransport=false; -0443 end -0444 end -0445 -0446 %If it is possible to move any gene in order to have a more -0447 %connected network, then move the best one -0448 if hasToAddTransport==false -0449 [newS, newg2c]=moveGene(newS,model,g2c,geneIndex(I),moveTo(I),nRxns,nMets); -0450 else -0451 %Choose a random unconnected metabolite that should be -0452 %connected -0453 transMet=unconnected(randsample(numel(unconnected),1)); -0454 -0455 %First get where the metabolite is now -0456 comps=ceil((transMet-nEM)/((size(S,1)-nEM)/nComps)); -0457 -0458 %Find the corresponding metabolite index if it were in the -0459 %default compartment -0460 dcIndex=transMet-(comps-1)*nMets; -0461 -0462 %Then get the indexes of that metabolite in all -0463 %compartments -0464 allIndexes=dcIndex; -0465 for k=1:nComps-1 -0466 allIndexes=[allIndexes;dcIndex+nMets*k]; -0467 end -0468 -0469 %It could be that some of these are not used in any -0470 %reaction. Get only the ones which are -0471 I=sum(newS(allIndexes,:)~=0,2)>0; -0472 -0473 %Then get the ones that are used but not in unconnected. -0474 %These are metabolites that could potentially be -0475 %transported to connect transMet -0476 connectedUsed=setdiff(allIndexes(I),unconnected); -0477 -0478 %This may be an error but leave it for now. It seems to -0479 %happen if nothing can be connected in one step -0480 if isempty(connectedUsed) -0481 break; -0482 end +0437 %Score which gene would be the best to move. The highest +0438 %deltaScore is 1.0. It should be possible to move a gene from +0439 %worst to best compartment even if it disconnects, say, 1.5 +0440 %more metabolites +0441 [score, I]=max(1.5*deltaScore+deltaConnected); +0442 +0443 %Check if it has to add a transport or if there is a gene that +0444 %could be moved order to have a more connected network +0445 hasToAddTransport=true; +0446 if ~isempty(deltaConnected) +0447 if score>0 +0448 hasToAddTransport=false; +0449 end +0450 end +0451 +0452 %If it is possible to move any gene in order to have a more +0453 %connected network, then move the best one +0454 if hasToAddTransport==false +0455 [newS, newg2c]=moveGene(newS,model,g2c,geneIndex(I),moveTo(I),nRxns,nMets); +0456 else +0457 %Choose a random unconnected metabolite that should be +0458 %connected +0459 transMet=unconnected(randsample(numel(unconnected),1)); +0460 +0461 %First get where the metabolite is now +0462 comps=ceil((transMet-nEM)/((size(S,1)-nEM)/nComps)); +0463 +0464 %Find the corresponding metabolite index if it were in the +0465 %default compartment +0466 dcIndex=transMet-(comps-1)*nMets; +0467 +0468 %Then get the indexes of that metabolite in all +0469 %compartments +0470 allIndexes=dcIndex; +0471 for k=1:nComps-1 +0472 allIndexes=[allIndexes;dcIndex+nMets*k]; +0473 end +0474 +0475 %It could be that some of these are not used in any +0476 %reaction. Get only the ones which are +0477 I=sum(newS(allIndexes,:)~=0,2)>0; +0478 +0479 %Then get the ones that are used but not in unconnected. +0480 %These are metabolites that could potentially be +0481 %transported to connect transMet +0482 connectedUsed=setdiff(allIndexes(I),unconnected); 0483 -0484 %If transMet is in the default compartment then everything -0485 %is fine, just connect it to a random one -0486 if transMet==dcIndex -0487 newS=addTransport(newS,nRxns,nER,nMets,nEM,nComps,transMet,connectedUsed(randsample(numel(connectedUsed),1))); -0488 else -0489 %If one of the connectedUsed is in the default -0490 %compartment then connect to that one -0491 I=connectedUsed(connectedUsed<(nMets+nEM)); -0492 if any(I) -0493 newS=addTransport(newS,nRxns,nER,nMets,nEM,nComps,transMet,I(randsample(numel(I),1))); -0494 else -0495 %This is if the only way to connect it is by adding -0496 %two transport reactions, going via the default -0497 %compartment -0498 break; -0499 end -0500 end -0501 end -0502 else -0503 wasConnected=true; -0504 break; -0505 end -0506 end -0507 -0508 %If the network was connected in a new way, it is possible that some -0509 %transport reactions are no longer needed. They should be removed -0510 if wasConnected==true -0511 %These are the metabolites that are being transported -0512 activeTransport=find(sum(newS(:,nER+nRxns*nComps+1:end),2)); -0513 -0514 %Get the metabolites that are unconnected if transport was not used -0515 unconnected=findUnconnected(newS(:,1:nER+nRxns*nComps),nEM); -0516 -0517 %Find the transport reactions that are not needed and delete them -0518 I=setdiff(activeTransport,unconnected); +0484 %This may be an error but leave it for now. It seems to +0485 %happen if nothing can be connected in one step +0486 if isempty(connectedUsed) +0487 break; +0488 end +0489 +0490 %If transMet is in the default compartment then everything +0491 %is fine, just connect it to a random one +0492 if transMet==dcIndex +0493 newS=addTransport(newS,nRxns,nER,nMets,nEM,nComps,transMet,connectedUsed(randsample(numel(connectedUsed),1))); +0494 else +0495 %If one of the connectedUsed is in the default +0496 %compartment then connect to that one +0497 I=connectedUsed(connectedUsed<(nMets+nEM)); +0498 if any(I) +0499 newS=addTransport(newS,nRxns,nER,nMets,nEM,nComps,transMet,I(randsample(numel(I),1))); +0500 else +0501 %This is if the only way to connect it is by adding +0502 %two transport reactions, going via the default +0503 %compartment +0504 break; +0505 end +0506 end +0507 end +0508 else +0509 wasConnected=true; +0510 break; +0511 end +0512 end +0513 +0514 %If the network was connected in a new way, it is possible that some +0515 %transport reactions are no longer needed. They should be removed +0516 if wasConnected==true +0517 %These are the metabolites that are being transported +0518 activeTransport=find(sum(newS(:,nER+nRxns*nComps+1:end),2)); 0519 -0520 %Since both metabolites in a transport rxns must be connected for -0521 %the reaction to be deleted, the sum over the colums should be 4 -0522 newS(:,find(sum(newS(I,nER+nRxns*nComps+1:end))==4)+nER+nRxns*nComps)=0; -0523 -0524 %Score the solution and determine whether to keep it as a new -0525 %solution -0526 [score, geneScore, trCost]=scoreModel(newS,newg2c,GSS,transportCost); -0527 -0528 %If it was the best solution so far, keep it -0529 if score>bestScore -0530 bestScore=score; -0531 bestS=newS; -0532 bestg2c=newg2c; -0533 end -0534 -0535 %This should not be steepest descent later -0536 if score>=bestScore% || exp((score-bestScore)*7)>rand() -0537 plotScore=[plotScore;geneScore]; -0538 nTrans=[nTrans;trCost]; -0539 totScore=[totScore;score]; -0540 S=newS; -0541 g2c=newg2c; -0542 -0543 if plotResults==true -0544 subplot(3,2,1); -0545 spy(S); -0546 subplot(3,2,2); -0547 plot(plotScore,'r'); -0548 xlabel('Gene score'); -0549 subplot(3,2,3); -0550 plot((plotScore-minScore)/(maxScore-minScore),'r'); -0551 xlabel('Gene score relative to predictions'); -0552 subplot(3,2,4); -0553 plot(nTrans,'g'); -0554 xlabel('Transport cost'); -0555 subplot(3,2,5); -0556 plot(totScore,'b'); -0557 xlabel('Total score'); -0558 subplot(3,2,6); -0559 pause(0.2); -0560 end -0561 end -0562 end -0563 end -0564 scores.totScore=score; -0565 scores.geneScore=geneScore; -0566 scores.transCost=trCost; -0567 -0568 %Find which metabolites are transported and to where -0569 [I, J]=find(bestS(nEM+1:nEM+nMets,end-nMets*(nComps-1)+1:end)); -0570 J=ceil(J/nMets+1); -0571 transportStruct.mets=model.metNames(I+nEM); -0572 transportStruct.toComp=GSS.compartments(J); +0520 %Get the metabolites that are unconnected if transport was not used +0521 unconnected=findUnconnected(newS(:,1:nER+nRxns*nComps),nEM); +0522 +0523 %Find the transport reactions that are not needed and delete them +0524 I=setdiff(activeTransport,unconnected); +0525 +0526 %Since both metabolites in a transport rxns must be connected for +0527 %the reaction to be deleted, the sum over the colums should be 4 +0528 newS(:,find(sum(newS(I,nER+nRxns*nComps+1:end))==4)+nER+nRxns*nComps)=0; +0529 +0530 %Score the solution and determine whether to keep it as a new +0531 %solution +0532 [score, geneScore, trCost]=scoreModel(newS,newg2c,GSS,transportCost); +0533 +0534 %If it was the best solution so far, keep it +0535 if score>bestScore +0536 bestScore=score; +0537 bestS=newS; +0538 bestg2c=newg2c; +0539 end +0540 +0541 %This should not be steepest descent later +0542 if score>=bestScore% || exp((score-bestScore)*7)>rand() +0543 plotScore=[plotScore;geneScore]; +0544 nTrans=[nTrans;trCost]; +0545 totScore=[totScore;score]; +0546 S=newS; +0547 g2c=newg2c; +0548 +0549 if plotResults==true +0550 subplot(3,2,1); +0551 spy(S); +0552 subplot(3,2,2); +0553 plot(plotScore,'r'); +0554 xlabel('Gene score'); +0555 subplot(3,2,3); +0556 plot((plotScore-minScore)/(maxScore-minScore),'r'); +0557 xlabel('Gene score relative to predictions'); +0558 subplot(3,2,4); +0559 plot(nTrans,'g'); +0560 xlabel('Transport cost'); +0561 subplot(3,2,5); +0562 plot(totScore,'b'); +0563 xlabel('Total score'); +0564 subplot(3,2,6); +0565 pause(0.2); +0566 end +0567 end +0568 end +0569 end +0570 scores.totScore=score; +0571 scores.geneScore=geneScore; +0572 scores.transCost=trCost; 0573 -0574 [I, J]=find(bestg2c); -0575 geneLocalization.genes=GSS.genes(I); -0576 geneLocalization.comps=GSS.compartments(J); -0577 -0578 %Resort the gene names -0579 [~, I]=sort(geneLocalization.genes); -0580 geneLocalization.genes=geneLocalization.genes(I); -0581 geneLocalization.comps=geneLocalization.comps(I); -0582 -0583 %Remove the fake genes -0584 I=strncmp('&&FAKE&&',geneLocalization.genes,8); -0585 geneLocalization.genes(I)=[]; -0586 geneLocalization.comps(I)=[]; -0587 -0588 %Put together the model. This is done by first duplicating the S matrix -0589 %into the different compartments. Then the transport reactions are added -0590 %based on transportStruct. By now model.S should have the same size as the -0591 %S matrix used in the optimization, but with conserved stoichiometry. In -0592 %the final step all reactions and metabolites that are not used in the S -0593 %matrix from the optimization are deleted from the model -0594 outModel=model; -0595 outModel.S=oldS; -0596 -0597 %This is the S matrix without exchange rxns or metabolites -0598 copyPart=outModel.S(nEM+1:end,nER+1:end); -0599 -0600 %Replicate to give the rxnGeneMat for the full system -0601 copyRxnGeneMat=outModel.rxnGeneMat(nER+1:end,:); -0602 outModel.rxnGeneMat=[outModel.rxnGeneMat;repmat(copyRxnGeneMat,nComps-1,1)]; -0603 -0604 %First fix the compartments. The model is already ordered with the exchange -0605 %metabolites first. The original model may contain one or two compartments, -0606 %depending on whether any exchange metabolites are defined -0607 nStartComps=numel(outModel.comps); -0608 if nStartComps==1 -0609 outModel.comps={'1'}; -0610 outModel.compNames=GSS.compartments(1); -0611 else -0612 if model.metComps(1)==1 -0613 outModel.compNames(1)=GSS.compartments(1); -0614 else -0615 outModel.compNames(2)=GSS.compartments(1); -0616 end -0617 end -0618 outModel.compNames=[outModel.compNames;GSS.compartments(2:end)']; -0619 -0620 %Ugly little loop -0621 for i=1:numel(GSS.compartments)-1 -0622 outModel.comps=[outModel.comps;num2str(numel(outModel.comps)+1)]; +0574 %Find which metabolites are transported and to where +0575 [I, J]=find(bestS(nEM+1:nEM+nMets,end-nMets*(nComps-1)+1:end)); +0576 J=ceil(J/nMets+1); +0577 transportStruct.mets=model.metNames(I+nEM); +0578 transportStruct.toComp=GSS.compartments(J); +0579 +0580 [I, J]=find(bestg2c); +0581 geneLocalization.genes=GSS.genes(I); +0582 geneLocalization.comps=GSS.compartments(J); +0583 +0584 %Resort the gene names +0585 [~, I]=sort(geneLocalization.genes); +0586 geneLocalization.genes=geneLocalization.genes(I); +0587 geneLocalization.comps=geneLocalization.comps(I); +0588 +0589 %Remove the fake genes +0590 I=strncmp('&&FAKE&&',geneLocalization.genes,8); +0591 geneLocalization.genes(I)=[]; +0592 geneLocalization.comps(I)=[]; +0593 +0594 %Put together the model. This is done by first duplicating the S matrix +0595 %into the different compartments. Then the transport reactions are added +0596 %based on transportStruct. By now model.S should have the same size as the +0597 %S matrix used in the optimization, but with conserved stoichiometry. In +0598 %the final step all reactions and metabolites that are not used in the S +0599 %matrix from the optimization are deleted from the model +0600 outModel=model; +0601 outModel.S=oldS; +0602 +0603 %This is the S matrix without exchange rxns or metabolites +0604 copyPart=outModel.S(nEM+1:end,nER+1:end); +0605 +0606 %Replicate to give the rxnGeneMat for the full system +0607 copyRxnGeneMat=outModel.rxnGeneMat(nER+1:end,:); +0608 outModel.rxnGeneMat=[outModel.rxnGeneMat;repmat(copyRxnGeneMat,nComps-1,1)]; +0609 +0610 %First fix the compartments. The model is already ordered with the exchange +0611 %metabolites first. The original model may contain one or two compartments, +0612 %depending on whether any exchange metabolites are defined +0613 nStartComps=numel(outModel.comps); +0614 if nStartComps==1 +0615 outModel.comps={'1'}; +0616 outModel.compNames=GSS.compartments(1); +0617 else +0618 if model.metComps(1)==1 +0619 outModel.compNames(1)=GSS.compartments(1); +0620 else +0621 outModel.compNames(2)=GSS.compartments(1); +0622 end 0623 end -0624 %This information is not known from the data, so empty fields are added -0625 outModel.compOutside=cell(numel(outModel.comps),1); -0626 outModel.compOutside(:)={''}; -0627 -0628 for i=1:nComps-1 -0629 outModel.S=[outModel.S sparse(size(outModel.S,1),nRxns)]; -0630 outModel.S=[outModel.S;[sparse(nMets,nRxns*i+nER) copyPart]]; -0631 outModel.rxns=[outModel.rxns;strcat(outModel.rxns(nER+1:nER+nRxns),'_',GSS.compartments{i+1})]; -0632 outModel.rxnNames=[outModel.rxnNames;strcat(outModel.rxnNames(nER+1:nER+nRxns),' (',GSS.compartments{i+1},')')]; -0633 outModel.lb=[outModel.lb;outModel.lb(nER+1:nER+nRxns)]; -0634 outModel.ub=[outModel.ub;outModel.ub(nER+1:nER+nRxns)]; -0635 outModel.rev=[outModel.rev;outModel.rev(nER+1:nER+nRxns)]; -0636 outModel.c=[outModel.c;outModel.c(nER+1:nER+nRxns)]; -0637 if isfield(outModel,'grRules') -0638 outModel.grRules=[outModel.grRules;outModel.grRules(nER+1:nER+nRxns)]; -0639 end -0640 if isfield(outModel,'subSystems') -0641 outModel.subSystems=[outModel.subSystems;outModel.subSystems(nER+1:nER+nRxns)]; -0642 end -0643 if isfield(outModel,'eccodes') -0644 outModel.eccodes=[outModel.eccodes;outModel.eccodes(nER+1:nER+nRxns)]; +0624 outModel.compNames=[outModel.compNames;GSS.compartments(2:end)']; +0625 +0626 %Ugly little loop +0627 for i=1:numel(GSS.compartments)-1 +0628 outModel.comps=[outModel.comps;num2str(numel(outModel.comps)+1)]; +0629 end +0630 %This information is not known from the data, so empty fields are added +0631 outModel.compOutside=cell(numel(outModel.comps),1); +0632 outModel.compOutside(:)={''}; +0633 +0634 for i=1:nComps-1 +0635 outModel.S=[outModel.S sparse(size(outModel.S,1),nRxns)]; +0636 outModel.S=[outModel.S;[sparse(nMets,nRxns*i+nER) copyPart]]; +0637 outModel.rxns=[outModel.rxns;strcat(outModel.rxns(nER+1:nER+nRxns),'_',GSS.compartments{i+1})]; +0638 outModel.rxnNames=[outModel.rxnNames;strcat(outModel.rxnNames(nER+1:nER+nRxns),' (',GSS.compartments{i+1},')')]; +0639 outModel.lb=[outModel.lb;outModel.lb(nER+1:nER+nRxns)]; +0640 outModel.ub=[outModel.ub;outModel.ub(nER+1:nER+nRxns)]; +0641 outModel.rev=[outModel.rev;outModel.rev(nER+1:nER+nRxns)]; +0642 outModel.c=[outModel.c;outModel.c(nER+1:nER+nRxns)]; +0643 if isfield(outModel,'grRules') +0644 outModel.grRules=[outModel.grRules;outModel.grRules(nER+1:nER+nRxns)]; 0645 end -0646 if isfield(outModel,'rxnFrom') -0647 outModel.rxnFrom=[outModel.rxnFrom;outModel.rxnFrom(nER+1:nER+nRxns)]; +0646 if isfield(outModel,'subSystems') +0647 outModel.subSystems=[outModel.subSystems;outModel.subSystems(nER+1:nER+nRxns)]; 0648 end -0649 if isfield(outModel,'rxnMiriams') -0650 outModel.rxnMiriams=[outModel.rxnMiriams;outModel.rxnMiriams(nER+1:nER+nRxns)]; +0649 if isfield(outModel,'eccodes') +0650 outModel.eccodes=[outModel.eccodes;outModel.eccodes(nER+1:nER+nRxns)]; 0651 end -0652 if isfield(outModel,'rxnNotes') -0653 outModel.rxnNotes=[outModel.rxnNotes;outModel.rxnNotes(nER+1:nER+nRxns)]; +0652 if isfield(outModel,'rxnFrom') +0653 outModel.rxnFrom=[outModel.rxnFrom;outModel.rxnFrom(nER+1:nER+nRxns)]; 0654 end -0655 if isfield(outModel,'rxnReferences') -0656 outModel.rxnReferences=[outModel.rxnReferences;outModel.rxnReferences(nER+1:nER+nRxns)]; +0655 if isfield(outModel,'rxnMiriams') +0656 outModel.rxnMiriams=[outModel.rxnMiriams;outModel.rxnMiriams(nER+1:nER+nRxns)]; 0657 end -0658 if isfield(outModel,'rxnConfidenceScores') -0659 outModel.rxnConfidenceScores=[outModel.rxnConfidenceScores;outModel.rxnConfidenceScores(nER+1:nER+nRxns)]; +0658 if isfield(outModel,'rxnNotes') +0659 outModel.rxnNotes=[outModel.rxnNotes;outModel.rxnNotes(nER+1:nER+nRxns)]; 0660 end -0661 if isfield(outModel,'rxnDeltaG') -0662 outModel.rxnDeltaG=[outModel.rxnDeltaG;outModel.rxnDeltaG(nER+1:nER+nRxns)]; +0661 if isfield(outModel,'rxnReferences') +0662 outModel.rxnReferences=[outModel.rxnReferences;outModel.rxnReferences(nER+1:nER+nRxns)]; 0663 end -0664 outModel.mets=[outModel.mets;strcat(outModel.mets(nEM+1:nEM+nMets),'_',GSS.compartments{i+1})]; -0665 outModel.metNames=[outModel.metNames;outModel.metNames(nEM+1:nEM+nMets)]; -0666 outModel.b=[outModel.b;outModel.b(nEM+1:nEM+nMets,:)]; -0667 I=ones(nMets,1)*nStartComps+i; -0668 outModel.metComps=[outModel.metComps;I]; -0669 if isfield(outModel,'inchis') -0670 outModel.inchis=[outModel.inchis;outModel.inchis(nEM+1:nEM+nMets)]; -0671 end -0672 if isfield(outModel,'metSmiles') -0673 outModel.metSmiles=[outModel.metSmiles;outModel.metSmiles(nEM+1:nEM+nMets)]; -0674 end -0675 if isfield(outModel,'unconstrained') -0676 outModel.unconstrained=[outModel.unconstrained;outModel.unconstrained(nEM+1:nEM+nMets)]; +0664 if isfield(outModel,'rxnConfidenceScores') +0665 outModel.rxnConfidenceScores=[outModel.rxnConfidenceScores;outModel.rxnConfidenceScores(nER+1:nER+nRxns)]; +0666 end +0667 if isfield(outModel,'rxnDeltaG') +0668 outModel.rxnDeltaG=[outModel.rxnDeltaG;outModel.rxnDeltaG(nER+1:nER+nRxns)]; +0669 end +0670 outModel.mets=[outModel.mets;strcat(outModel.mets(nEM+1:nEM+nMets),'_',GSS.compartments{i+1})]; +0671 outModel.metNames=[outModel.metNames;outModel.metNames(nEM+1:nEM+nMets)]; +0672 outModel.b=[outModel.b;outModel.b(nEM+1:nEM+nMets,:)]; +0673 I=ones(nMets,1)*nStartComps+i; +0674 outModel.metComps=[outModel.metComps;I]; +0675 if isfield(outModel,'inchis') +0676 outModel.inchis=[outModel.inchis;outModel.inchis(nEM+1:nEM+nMets)]; 0677 end -0678 if isfield(outModel,'metMiriams') -0679 outModel.metMiriams=[outModel.metMiriams;outModel.metMiriams(nEM+1:nEM+nMets)]; +0678 if isfield(outModel,'metSmiles') +0679 outModel.metSmiles=[outModel.metSmiles;outModel.metSmiles(nEM+1:nEM+nMets)]; 0680 end -0681 if isfield(outModel,'metFormulas') -0682 outModel.metFormulas=[outModel.metFormulas;outModel.metFormulas(nEM+1:nEM+nMets)]; +0681 if isfield(outModel,'unconstrained') +0682 outModel.unconstrained=[outModel.unconstrained;outModel.unconstrained(nEM+1:nEM+nMets)]; 0683 end -0684 if isfield(outModel,'metFrom') -0685 outModel.metFrom=[outModel.metFrom;outModel.metFrom(nEM+1:nEM+nMets)]; +0684 if isfield(outModel,'metMiriams') +0685 outModel.metMiriams=[outModel.metMiriams;outModel.metMiriams(nEM+1:nEM+nMets)]; 0686 end -0687 if isfield(outModel,'metCharges') -0688 outModel.metCharges=[outModel.metCharges;outModel.metCharges(nEM+1:nEM+nMets)]; +0687 if isfield(outModel,'metFormulas') +0688 outModel.metFormulas=[outModel.metFormulas;outModel.metFormulas(nEM+1:nEM+nMets)]; 0689 end -0690 if isfield(outModel,'metDeltaG') -0691 outModel.metDeltaG=[outModel.metDeltaG;outModel.metDeltaG(nEM+1:nEM+nMets)]; +0690 if isfield(outModel,'metFrom') +0691 outModel.metFrom=[outModel.metFrom;outModel.metFrom(nEM+1:nEM+nMets)]; 0692 end -0693 end -0694 -0695 %Add the transport reactions -0696 transS=bestS(:,numel(outModel.rxns)+1:end); -0697 J=sum(transS)>0; %Active rxns -0698 -0699 %Transport reactions are written in a different way compared to a "real" -0700 %stoichimetric matrix. This is to fix that -0701 transS(transS~=0)=1; -0702 transS(1:nEM+nMets,:)=transS(1:nEM+nMets,:)*-1; -0703 I=find(sum(transS>0,2)); -0704 nTransRxns=numel(I); -0705 outModel.S=[outModel.S transS(:,J)]; -0706 filler=ones(nTransRxns,1); -0707 outModel.lb=[outModel.lb;filler*-1000]; -0708 outModel.ub=[outModel.ub;filler*1000]; -0709 outModel.rev=[outModel.rev;filler]; -0710 outModel.c=[outModel.c;filler*0]; -0711 outModel.rxnGeneMat=[outModel.rxnGeneMat;sparse(nTransRxns,numel(outModel.genes))]; -0712 -0713 for i=1:numel(I) -0714 outModel.rxns=[outModel.rxns;strcat('transport',num2str(i))]; -0715 outModel.rxnNames=[outModel.rxnNames;['Transport of ',outModel.metNames{I(i)}]]; -0716 if isfield(outModel,'grRules') -0717 outModel.grRules=[outModel.grRules;{''}]; -0718 end -0719 if isfield(outModel,'rxnMiriams') -0720 outModel.rxnMiriams=[outModel.rxnMiriams;{[]}]; -0721 end -0722 if isfield(outModel,'subSystems') -0723 outModel.subSystems=[outModel.subSystems;{{'Inferred transport reactions'}}]; +0693 if isfield(outModel,'metCharges') +0694 outModel.metCharges=[outModel.metCharges;outModel.metCharges(nEM+1:nEM+nMets)]; +0695 end +0696 if isfield(outModel,'metDeltaG') +0697 outModel.metDeltaG=[outModel.metDeltaG;outModel.metDeltaG(nEM+1:nEM+nMets)]; +0698 end +0699 end +0700 +0701 %Add the transport reactions +0702 transS=bestS(:,numel(outModel.rxns)+1:end); +0703 J=sum(transS)>0; %Active rxns +0704 +0705 %Transport reactions are written in a different way compared to a "real" +0706 %stoichimetric matrix. This is to fix that +0707 transS(transS~=0)=1; +0708 transS(1:nEM+nMets,:)=transS(1:nEM+nMets,:)*-1; +0709 I=find(sum(transS>0,2)); +0710 nTransRxns=numel(I); +0711 outModel.S=[outModel.S transS(:,J)]; +0712 filler=ones(nTransRxns,1); +0713 outModel.lb=[outModel.lb;filler*-1000]; +0714 outModel.ub=[outModel.ub;filler*1000]; +0715 outModel.rev=[outModel.rev;filler]; +0716 outModel.c=[outModel.c;filler*0]; +0717 outModel.rxnGeneMat=[outModel.rxnGeneMat;sparse(nTransRxns,numel(outModel.genes))]; +0718 +0719 for i=1:numel(I) +0720 outModel.rxns=[outModel.rxns;strcat('transport',num2str(i))]; +0721 outModel.rxnNames=[outModel.rxnNames;['Transport of ',outModel.metNames{I(i)}]]; +0722 if isfield(outModel,'grRules') +0723 outModel.grRules=[outModel.grRules;{''}]; 0724 end -0725 if isfield(outModel,'eccodes') -0726 outModel.eccodes=[outModel.eccodes;{''}]; +0725 if isfield(outModel,'rxnMiriams') +0726 outModel.rxnMiriams=[outModel.rxnMiriams;{[]}]; 0727 end -0728 if isfield(outModel,'rxnFrom') -0729 outModel.rxnFrom=[outModel.rxnFrom;{''}]; +0728 if isfield(outModel,'subSystems') +0729 outModel.subSystems=[outModel.subSystems;{{'Inferred transport reactions'}}]; 0730 end -0731 if isfield(outModel,'rxnNotes') -0732 outModel.rxnNotes=[outModel.rxnNotes;{''}]; +0731 if isfield(outModel,'eccodes') +0732 outModel.eccodes=[outModel.eccodes;{''}]; 0733 end -0734 if isfield(outModel,'rxnReferences') -0735 outModel.rxnReferences=[outModel.rxnReferences;{''}]; +0734 if isfield(outModel,'rxnFrom') +0735 outModel.rxnFrom=[outModel.rxnFrom;{''}]; 0736 end -0737 if isfield(outModel,'rxnConfidenceScores') -0738 outModel.rxnConfidenceScores=[outModel.rxnConfidenceScores;NaN]; +0737 if isfield(outModel,'rxnNotes') +0738 outModel.rxnNotes=[outModel.rxnNotes;{''}]; 0739 end -0740 if isfield(outModel,'rxnDeltaG') -0741 outModel.rxnDeltaG=[outModel.rxnDeltaG;NaN]; +0740 if isfield(outModel,'rxnReferences') +0741 outModel.rxnReferences=[outModel.rxnReferences;{''}]; 0742 end -0743 end -0744 -0745 %Then remove all reactions and metabolites that aren't used in the final -0746 %solution from the optimization -0747 [~, J]=find(bestS(:,1:nER+nComps*nRxns)); -0748 K=true(numel(outModel.rxns),1); -0749 K(J)=false; -0750 K(end-nTransRxns+1:end)=false; -0751 outModel=removeReactions(outModel,K,true); -0752 -0753 %Remove all fake genes -0754 I=strncmp('&&FAKE&&',outModel.genes,8); -0755 outModel.genes(I)=[]; -0756 if isfield(outModel,'geneMiriams') -0757 outModel.geneMiriams(I)=[]; -0758 end -0759 if isfield(outModel,'geneShortNames') -0760 outModel.geneShortNames(I)=[]; -0761 end -0762 outModel.rxnGeneMat(:,I)=[]; -0763 -0764 %Fix grRules and reconstruct rxnGeneMat -0765 [grRules,rxnGeneMat] = standardizeGrRules(outModel,true); -0766 outModel.grRules = grRules; -0767 outModel.rxnGeneMat = rxnGeneMat; -0768 end -0769 -0770 %Moves a gene and all associated reactions from one compartment to another -0771 function [S, g2c]=moveGene(S,model,g2c,geneToMove,toComp,nRxns,nMets) -0772 %Find the current compartment and update to the new one -0773 currentComp=find(g2c(geneToMove,:)); -0774 g2c(geneToMove,:)=false; -0775 g2c(geneToMove,toComp)=true; -0776 -0777 %Find the reactions in the original model that the gene controls -0778 [I, ~]=find(model.rxnGeneMat(:,geneToMove)); -0779 -0780 %Calculate their current positions in the S matrix -0781 oldRxns=I+(currentComp-1)*nRxns; -0782 -0783 %And their new positions -0784 newRxns=I+(toComp-1)*nRxns; +0743 if isfield(outModel,'rxnConfidenceScores') +0744 outModel.rxnConfidenceScores=[outModel.rxnConfidenceScores;NaN]; +0745 end +0746 if isfield(outModel,'rxnDeltaG') +0747 outModel.rxnDeltaG=[outModel.rxnDeltaG;NaN]; +0748 end +0749 end +0750 +0751 %Then remove all reactions and metabolites that aren't used in the final +0752 %solution from the optimization +0753 [~, J]=find(bestS(:,1:nER+nComps*nRxns)); +0754 K=true(numel(outModel.rxns),1); +0755 K(J)=false; +0756 K(end-nTransRxns+1:end)=false; +0757 outModel=removeReactions(outModel,K,true); +0758 +0759 %Remove all fake genes +0760 I=strncmp('&&FAKE&&',outModel.genes,8); +0761 outModel.genes(I)=[]; +0762 if isfield(outModel,'geneMiriams') +0763 outModel.geneMiriams(I)=[]; +0764 end +0765 if isfield(outModel,'geneShortNames') +0766 outModel.geneShortNames(I)=[]; +0767 end +0768 if isfield(outModel,'proteinNames') +0769 outModel.proteinNames(I)=[]; +0770 end +0771 outModel.rxnGeneMat(:,I)=[]; +0772 +0773 %Fix grRules and reconstruct rxnGeneMat +0774 [grRules,rxnGeneMat] = standardizeGrRules(outModel,true); +0775 outModel.grRules = grRules; +0776 outModel.rxnGeneMat = rxnGeneMat; +0777 end +0778 +0779 %Moves a gene and all associated reactions from one compartment to another +0780 function [S, g2c]=moveGene(S,model,g2c,geneToMove,toComp,nRxns,nMets) +0781 %Find the current compartment and update to the new one +0782 currentComp=find(g2c(geneToMove,:)); +0783 g2c(geneToMove,:)=false; +0784 g2c(geneToMove,toComp)=true; 0785 -0786 %The metabolite ids also have to be changed in order to match the new -0787 %compartment -0788 metChange=nMets*(toComp-currentComp); -0789 -0790 %Update the reactions -0791 [I, J, K]=find(S(:,oldRxns)); -0792 I=I+metChange; -0793 -0794 %Move the reactions -0795 S(:,oldRxns)=0; -0796 S(sub2ind(size(S),I,newRxns(J)))=K; -0797 end +0786 %Find the reactions in the original model that the gene controls +0787 [I, ~]=find(model.rxnGeneMat(:,geneToMove)); +0788 +0789 %Calculate their current positions in the S matrix +0790 oldRxns=I+(currentComp-1)*nRxns; +0791 +0792 %And their new positions +0793 newRxns=I+(toComp-1)*nRxns; +0794 +0795 %The metabolite ids also have to be changed in order to match the new +0796 %compartment +0797 metChange=nMets*(toComp-currentComp); 0798 -0799 %Finds which metabolites are unconnected, in the sense that they are never -0800 %a product or only a product in a reversible reaction where one reactant is -0801 %only a product in the opposite direction of that reaction. This function -0802 %ignores exchange metabolites. Returns a vector of metabolite indexes. -0803 %metsToCheck is an array of metabolite indexes to check for connectivity. -0804 %If not supplied then all metabolites are checked -0805 function unconnected=findUnconnected(S,nEM,metsToCheck) -0806 if nargin>2 -0807 %Do this by deleting everything from the network that is not in -0808 %metsToCheck and that is not exchange metabolites -0809 I=false(size(S,1),1); -0810 I(1:nEM)=true; -0811 I(metsToCheck)=true; -0812 S=S(I,:); -0813 end -0814 -0815 em=false(size(S,1),1); -0816 em(1:nEM)=true; -0817 -0818 %Construct a matrix in which the reversible reactions are inverted -0819 I=sum(S>2,1) | sum(S>2,1); -0820 revS=S; -0821 revS(:,I)=revS(:,I)*-1; -0822 -0823 %First calculate the ones that are ok -0824 %Produced in 2 rxns, is exchange, is not used at all, is produced in -0825 %non-reversible, involved in more than 1 reversible reactions -0826 connected=sum(S>0,2)>1 | em | sum(S~=0,2)==0 | sum(S(:,~I)>0,2)>0 | sum(S(:,I)~=0,2)>1; -0827 -0828 %Then get the ones that are unconnected because they are never produced -0829 unconnected=sum(S>0 | revS>0,2)==0 & connected==false; -0830 -0831 %Then get the ones that are potentially unconnected -0832 maybeUnconnected=~connected & ~unconnected; -0833 %maybeUnconnected=find(maybeUnconnectedS); -0834 -0835 %The metabolites in maybeUnconnected are involved in one reversible -0836 %reaction and not produced in any other reaction. This means that the -0837 %reactions which have at least one met in maybeUnconnected as reactant and -0838 %one as product are unconnected. The metabolites in maybeUnconnected that -0839 %are present in those reactions are then dead ends -0840 deadRxns=any(S(maybeUnconnected,:)>0) & any(S(maybeUnconnected,:)<0); -0841 -0842 %Get the mets involved in any of those reactions -0843 problematic=any(S(:,deadRxns)~=0,2); -0844 -0845 %If any of these are in the maybeUnconnected list then the metabolite is -0846 %unconnected -0847 unconnected(problematic & maybeUnconnected)=true; -0848 -0849 %Map back to metsToCheck -0850 if nargin>2 -0851 unconnected=metsToCheck(unconnected(nEM+1:end)); -0852 else -0853 unconnected=find(unconnected); -0854 end -0855 end -0856 -0857 %Given a set of unconnected metabolites, this function tries to move each -0858 %gene that could connect any of them, calculates the number of newly -0859 %connected metabolites minus the number of newly disconnected metabolites. -0860 %As some metabolites are very connected, only 25 genes are checked. Genes -0861 %that have a low score in their current compartment are more likely to be -0862 %moved -0863 function [geneIndex, moveTo, deltaConnected, deltaScore]=selectGenes(S,nEM,nMets,nER,nRxns,model,unconnected,g2c,GSS) -0864 %If moveTo is 0 then the gene cannot connect any of the metabolites -0865 moveTo=zeros(numel(model.genes),1); -0866 deltaConnected=zeros(numel(model.genes),1); -0867 -0868 %First get where the metabolites are now -0869 nComps=size(g2c,2); -0870 comps=ceil((unconnected-nEM)/((size(S,1)-nEM)/nComps)); -0871 -0872 %Find the corresponding metabolite indexes if they all were in the default -0873 %compartment -0874 dcIndexes=unique(unconnected-(comps-1)*nMets); -0875 -0876 %Then find them if they were in any other compartment -0877 allIndexes=dcIndexes; -0878 for i=1:nComps-1 -0879 allIndexes=[allIndexes;dcIndexes+nMets*i]; -0880 end -0881 -0882 %Also check which reversible reactions that could be used -0883 I=sum(S>2,1) | sum(S>2,1); -0884 revS=S; -0885 revS(:,I)=revS(:,I)*-1; -0886 -0887 %Find all reactions that could make any of the unconnected metabolites in -0888 %some other compartment -0889 newMets=setdiff(allIndexes,unconnected); -0890 [~, potential]=find(S(newMets,:)>0 | revS(newMets,:)>0); -0891 potential(potential<=nER | potential>nER+nRxns*nComps)=[]; %No exchange rxns or transport rxns -0892 -0893 %Map J to the real metabolic reactions in model -0894 rxnComps=ceil((potential-nER)/(nRxns)); +0799 %Update the reactions +0800 [I, J, K]=find(S(:,oldRxns)); +0801 I=I+metChange; +0802 +0803 %Move the reactions +0804 S(:,oldRxns)=0; +0805 S(sub2ind(size(S),I,newRxns(J)))=K; +0806 end +0807 +0808 %Finds which metabolites are unconnected, in the sense that they are never +0809 %a product or only a product in a reversible reaction where one reactant is +0810 %only a product in the opposite direction of that reaction. This function +0811 %ignores exchange metabolites. Returns a vector of metabolite indexes. +0812 %metsToCheck is an array of metabolite indexes to check for connectivity. +0813 %If not supplied then all metabolites are checked +0814 function unconnected=findUnconnected(S,nEM,metsToCheck) +0815 if nargin>2 +0816 %Do this by deleting everything from the network that is not in +0817 %metsToCheck and that is not exchange metabolites +0818 I=false(size(S,1),1); +0819 I(1:nEM)=true; +0820 I(metsToCheck)=true; +0821 S=S(I,:); +0822 end +0823 +0824 em=false(size(S,1),1); +0825 em(1:nEM)=true; +0826 +0827 %Construct a matrix in which the reversible reactions are inverted +0828 I=sum(S>2,1) | sum(S>2,1); +0829 revS=S; +0830 revS(:,I)=revS(:,I)*-1; +0831 +0832 %First calculate the ones that are ok +0833 %Produced in 2 rxns, is exchange, is not used at all, is produced in +0834 %non-reversible, involved in more than 1 reversible reactions +0835 connected=sum(S>0,2)>1 | em | sum(S~=0,2)==0 | sum(S(:,~I)>0,2)>0 | sum(S(:,I)~=0,2)>1; +0836 +0837 %Then get the ones that are unconnected because they are never produced +0838 unconnected=sum(S>0 | revS>0,2)==0 & connected==false; +0839 +0840 %Then get the ones that are potentially unconnected +0841 maybeUnconnected=~connected & ~unconnected; +0842 %maybeUnconnected=find(maybeUnconnectedS); +0843 +0844 %The metabolites in maybeUnconnected are involved in one reversible +0845 %reaction and not produced in any other reaction. This means that the +0846 %reactions which have at least one met in maybeUnconnected as reactant and +0847 %one as product are unconnected. The metabolites in maybeUnconnected that +0848 %are present in those reactions are then dead ends +0849 deadRxns=any(S(maybeUnconnected,:)>0) & any(S(maybeUnconnected,:)<0); +0850 +0851 %Get the mets involved in any of those reactions +0852 problematic=any(S(:,deadRxns)~=0,2); +0853 +0854 %If any of these are in the maybeUnconnected list then the metabolite is +0855 %unconnected +0856 unconnected(problematic & maybeUnconnected)=true; +0857 +0858 %Map back to metsToCheck +0859 if nargin>2 +0860 unconnected=metsToCheck(unconnected(nEM+1:end)); +0861 else +0862 unconnected=find(unconnected); +0863 end +0864 end +0865 +0866 %Given a set of unconnected metabolites, this function tries to move each +0867 %gene that could connect any of them, calculates the number of newly +0868 %connected metabolites minus the number of newly disconnected metabolites. +0869 %As some metabolites are very connected, only 25 genes are checked. Genes +0870 %that have a low score in their current compartment are more likely to be +0871 %moved +0872 function [geneIndex, moveTo, deltaConnected, deltaScore]=selectGenes(S,nEM,nMets,nER,nRxns,model,unconnected,g2c,GSS) +0873 %If moveTo is 0 then the gene cannot connect any of the metabolites +0874 moveTo=zeros(numel(model.genes),1); +0875 deltaConnected=zeros(numel(model.genes),1); +0876 +0877 %First get where the metabolites are now +0878 nComps=size(g2c,2); +0879 comps=ceil((unconnected-nEM)/((size(S,1)-nEM)/nComps)); +0880 +0881 %Find the corresponding metabolite indexes if they all were in the default +0882 %compartment +0883 dcIndexes=unique(unconnected-(comps-1)*nMets); +0884 +0885 %Then find them if they were in any other compartment +0886 allIndexes=dcIndexes; +0887 for i=1:nComps-1 +0888 allIndexes=[allIndexes;dcIndexes+nMets*i]; +0889 end +0890 +0891 %Also check which reversible reactions that could be used +0892 I=sum(S>2,1) | sum(S>2,1); +0893 revS=S; +0894 revS(:,I)=revS(:,I)*-1; 0895 -0896 %Find the corresponding reaction indexes if they all were in the default -0897 %compartment -0898 dcRxnIndexes=potential-(rxnComps-1)*nRxns; -0899 -0900 %Get the genes for those reactions -0901 genes=find(sum(model.rxnGeneMat(dcRxnIndexes,:)>0,1)); -0902 -0903 %For some cases there can be very many reactions to connect something. This -0904 %is in particular true in the beginning of the optimization if, say, ATP is -0905 %unconnected. Therefore limit the number of genes to be checked to 25. -0906 %Weigh so that genes with bad scores in their current compartment are more -0907 %likely to be moved. +0896 %Find all reactions that could make any of the unconnected metabolites in +0897 %some other compartment +0898 newMets=setdiff(allIndexes,unconnected); +0899 [~, potential]=find(S(newMets,:)>0 | revS(newMets,:)>0); +0900 potential(potential<=nER | potential>nER+nRxns*nComps)=[]; %No exchange rxns or transport rxns +0901 +0902 %Map J to the real metabolic reactions in model +0903 rxnComps=ceil((potential-nER)/(nRxns)); +0904 +0905 %Find the corresponding reaction indexes if they all were in the default +0906 %compartment +0907 dcRxnIndexes=potential-(rxnComps-1)*nRxns; 0908 -0909 %Get scores for these genes -0910 [~, J]=find(g2c(genes,:)); +0909 %Get the genes for those reactions +0910 genes=find(sum(model.rxnGeneMat(dcRxnIndexes,:)>0,1)); 0911 -0912 %Add a small weight so that genes in their best compartment could be moved -0913 %as well -0914 geneScores=GSS.scores(sub2ind(size(g2c),genes(:),J)); -0915 modGeneScores=1.1-geneScores; -0916 if numel(genes)>25 -0917 rGenes=genes(randsample(numel(genes),min(numel(genes),25),true,modGeneScores)); -0918 -0919 %The sampling with weights could give duplicates -0920 rGenes=unique(rGenes); -0921 -0922 %Reorder the geneScores to match -0923 [~, I]=ismember(rGenes,genes); -0924 geneScores=geneScores(I); -0925 genes=rGenes; -0926 end -0927 for i=1:numel(genes) -0928 %Since one gene is moved at a time, only metabolites involved in any of -0929 %the reactions for that gene can become unconnected. This helps to -0930 %speed up the algorithm. First get all involved reactions in the -0931 %default compartment -0932 rxns=find(model.rxnGeneMat(:,genes(i))); -0933 -0934 %Then get their mets -0935 mets=find(sum(model.S(:,rxns)~=0,2)>0); -0936 -0937 %Then get their indexes in all compartments -0938 allIndexes=mets; -0939 for j=1:nComps-1 -0940 allIndexes=[allIndexes;mets+nMets*j]; -0941 end +0912 %For some cases there can be very many reactions to connect something. This +0913 %is in particular true in the beginning of the optimization if, say, ATP is +0914 %unconnected. Therefore limit the number of genes to be checked to 25. +0915 %Weigh so that genes with bad scores in their current compartment are more +0916 %likely to be moved. +0917 +0918 %Get scores for these genes +0919 [~, J]=find(g2c(genes,:)); +0920 +0921 %Add a small weight so that genes in their best compartment could be moved +0922 %as well +0923 geneScores=GSS.scores(sub2ind(size(g2c),genes(:),J)); +0924 modGeneScores=1.1-geneScores; +0925 if numel(genes)>25 +0926 rGenes=genes(randsample(numel(genes),min(numel(genes),25),true,modGeneScores)); +0927 +0928 %The sampling with weights could give duplicates +0929 rGenes=unique(rGenes); +0930 +0931 %Reorder the geneScores to match +0932 [~, I]=ismember(rGenes,genes); +0933 geneScores=geneScores(I); +0934 genes=rGenes; +0935 end +0936 for i=1:numel(genes) +0937 %Since one gene is moved at a time, only metabolites involved in any of +0938 %the reactions for that gene can become unconnected. This helps to +0939 %speed up the algorithm. First get all involved reactions in the +0940 %default compartment +0941 rxns=find(model.rxnGeneMat(:,genes(i))); 0942 -0943 %Check which of the unconnected metabolites that these reactions -0944 %correspond to. This could have been done earlier, but it is fast. The -0945 %reversibility check is skipped because it is unlikely to be an issue -0946 %here. Worst case is that the gene is tested once to much -0947 [I, ~]=find(model.S(:,rxns)); -0948 moveToComps=unique(comps(ismember(dcIndexes,I))); -0949 -0950 %Try to move the gene to each of the compartments -0951 bestMove=-inf; -0952 bestComp=[]; -0953 for j=1:numel(moveToComps) -0954 newS=moveGene(S,model,g2c,genes(i),moveToComps(j),nRxns,nMets); -0955 -0956 %Check how many metabolites that are unconnected after moving the -0957 %gene -0958 dConnected=numel(unconnected)-numel(findUnconnected(newS,nEM,[allIndexes;unconnected])); -0959 if dConnected>bestMove -0960 bestMove=dConnected; -0961 bestComp=moveToComps(j); -0962 end -0963 end -0964 -0965 %Add the difference in connectivity and where the genes should be moved -0966 moveTo(genes(i))=bestComp; -0967 deltaConnected(genes(i))=bestMove; -0968 end -0969 -0970 %Finish up -0971 geneIndex=genes(:); -0972 moveTo=moveTo(geneIndex); -0973 deltaConnected=deltaConnected(geneIndex); -0974 deltaScore=GSS.scores(sub2ind(size(g2c),geneIndex(:),moveTo))-geneScores; -0975 end -0976 -0977 %Small function to add a transport reactions between two metabolites. -0978 %Transport reactions are written as having a coefficient 2.0 for both -0979 %reactant and product. This is not a "real" reaction, but since all normal -0980 %reactions have coefficient -1/1 or -10/10 it is a compact way of writing -0981 %it -0982 function S=addTransport(S,nRxns,nER,nMets,nEM,nComps,metA,metB) -0983 mets=[metA;metB]; -0984 %Find the current compartments for the metabolites -0985 comps=ceil((mets-nEM)/((size(S,1)-nEM)/nComps)); -0986 -0987 if sum(comps==1)~=1 -0988 EM='Tried to create a transport reaction from a non-default compartment'; -0989 dispEM(EM); -0990 end -0991 -0992 %Calculate the reaction index -0993 rIndex=(nER+nRxns*nComps)+mets(comps~=1)-nEM-nMets; -0994 -0995 S(mets,rIndex)=2; -0996 end -0997 -0998 %Scores a network based on the localization of the genes and the number of -0999 %transporter reactions used -1000 function [score, geneScore, transportCost]=scoreModel(S,g2c,GSS,transportCost) -1001 [I, J]=find(g2c); -1002 geneScore=sum(GSS.scores(sub2ind(size(g2c),I,J))); -1003 [I, ~]=find(S==2); -1004 I=unique(I); -1005 transportCost=sum(transportCost(I)); -1006 score=geneScore-transportCost; -1007 end -1008 -1009 % To avoid dependency on stats toolbox, use this alternative implementation -1010 % of randsample, source: -1011 % https://github.com/gpeyre/numerical-tours/blob/dacee30081c04ef5f67b26b387ead85f2b193af9/matlab/toolbox_signal/randsample.m -1012 function y = randsample(n, k, replace, w) -1013 %RANDSAMPLE Random sample, with or without replacement. -1014 % Y = RANDSAMPLE(N,K) returns Y as a vector of K values sampled uniformly -1015 % at random, without replacement, from the integers 1:N. -1016 % -1017 % Y = RANDSAMPLE(POPULATION,K) returns K values sampled uniformly at -1018 % random, without replacement, from the values in the vector POPULATION. -1019 % -1020 % Y = RANDSAMPLE(...,REPLACE) returns a sample taken with replacement if -1021 % REPLACE is true, or without replacement if REPLACE is false (the default). -1022 % -1023 % Y = RANDSAMPLE(...,true,W) returns a weighted sample, using positive -1024 % weights W, taken with replacement. W is often a vector of probabilities. -1025 % This function does not support weighted sampling without replacement. -1026 % -1027 % Example: Generate a random sequence of the characters ACGT, with -1028 % replacement, according to specified probabilities. -1029 % -1030 % R = randsample('ACGT',48,true,[0.15 0.35 0.35 0.15]) +0943 %Then get their mets +0944 mets=find(sum(model.S(:,rxns)~=0,2)>0); +0945 +0946 %Then get their indexes in all compartments +0947 allIndexes=mets; +0948 for j=1:nComps-1 +0949 allIndexes=[allIndexes;mets+nMets*j]; +0950 end +0951 +0952 %Check which of the unconnected metabolites that these reactions +0953 %correspond to. This could have been done earlier, but it is fast. The +0954 %reversibility check is skipped because it is unlikely to be an issue +0955 %here. Worst case is that the gene is tested once to much +0956 [I, ~]=find(model.S(:,rxns)); +0957 moveToComps=unique(comps(ismember(dcIndexes,I))); +0958 +0959 %Try to move the gene to each of the compartments +0960 bestMove=-inf; +0961 bestComp=[]; +0962 for j=1:numel(moveToComps) +0963 newS=moveGene(S,model,g2c,genes(i),moveToComps(j),nRxns,nMets); +0964 +0965 %Check how many metabolites that are unconnected after moving the +0966 %gene +0967 dConnected=numel(unconnected)-numel(findUnconnected(newS,nEM,[allIndexes;unconnected])); +0968 if dConnected>bestMove +0969 bestMove=dConnected; +0970 bestComp=moveToComps(j); +0971 end +0972 end +0973 +0974 %Add the difference in connectivity and where the genes should be moved +0975 moveTo(genes(i))=bestComp; +0976 deltaConnected(genes(i))=bestMove; +0977 end +0978 +0979 %Finish up +0980 geneIndex=genes(:); +0981 moveTo=moveTo(geneIndex); +0982 deltaConnected=deltaConnected(geneIndex); +0983 deltaScore=GSS.scores(sub2ind(size(g2c),geneIndex(:),moveTo))-geneScores; +0984 end +0985 +0986 %Small function to add a transport reactions between two metabolites. +0987 %Transport reactions are written as having a coefficient 2.0 for both +0988 %reactant and product. This is not a "real" reaction, but since all normal +0989 %reactions have coefficient -1/1 or -10/10 it is a compact way of writing +0990 %it +0991 function S=addTransport(S,nRxns,nER,nMets,nEM,nComps,metA,metB) +0992 mets=[metA;metB]; +0993 %Find the current compartments for the metabolites +0994 comps=ceil((mets-nEM)/((size(S,1)-nEM)/nComps)); +0995 +0996 if sum(comps==1)~=1 +0997 EM='Tried to create a transport reaction from a non-default compartment'; +0998 dispEM(EM); +0999 end +1000 +1001 %Calculate the reaction index +1002 rIndex=(nER+nRxns*nComps)+mets(comps~=1)-nEM-nMets; +1003 +1004 S(mets,rIndex)=2; +1005 end +1006 +1007 %Scores a network based on the localization of the genes and the number of +1008 %transporter reactions used +1009 function [score, geneScore, transportCost]=scoreModel(S,g2c,GSS,transportCost) +1010 [I, J]=find(g2c); +1011 geneScore=sum(GSS.scores(sub2ind(size(g2c),I,J))); +1012 [I, ~]=find(S==2); +1013 I=unique(I); +1014 transportCost=sum(transportCost(I)); +1015 score=geneScore-transportCost; +1016 end +1017 +1018 % To avoid dependency on stats toolbox, use this alternative implementation +1019 % of randsample, source: +1020 % https://github.com/gpeyre/numerical-tours/blob/dacee30081c04ef5f67b26b387ead85f2b193af9/matlab/toolbox_signal/randsample.m +1021 function y = randsample(n, k, replace, w) +1022 %RANDSAMPLE Random sample, with or without replacement. +1023 % Y = RANDSAMPLE(N,K) returns Y as a vector of K values sampled uniformly +1024 % at random, without replacement, from the integers 1:N. +1025 % +1026 % Y = RANDSAMPLE(POPULATION,K) returns K values sampled uniformly at +1027 % random, without replacement, from the values in the vector POPULATION. +1028 % +1029 % Y = RANDSAMPLE(...,REPLACE) returns a sample taken with replacement if +1030 % REPLACE is true, or without replacement if REPLACE is false (the default). 1031 % -1032 % See also RAND, RANDPERM. -1033 -1034 % Copyright 1993-2008 The MathWorks, Inc. -1035 % $Revision: 1.1.4.3 $ $Date: 2008/12/01 08:09:34 $ -1036 -1037 if nargin < 2 -1038 error('stats:randsample:TooFewInputs','Requires two input arguments.'); -1039 elseif numel(n) == 1 -1040 population = []; -1041 else -1042 population = n; -1043 n = numel(population); -1044 if length(population)~=n -1045 error('stats:randsample:BadPopulation','POPULATION must be a vector.'); -1046 end -1047 end -1048 -1049 if nargin < 3 -1050 replace = false; -1051 end -1052 -1053 if nargin < 4 -1054 w = []; -1055 elseif ~isempty(w) -1056 if length(w) ~= n -1057 if isempty(population) -1058 error('stats:randsample:InputSizeMismatch',... -1059 'W must have length equal to N.'); -1060 else -1061 error('stats:randsample:InputSizeMismatch',... -1062 'W must have the same length as the population.'); -1063 end -1064 else -1065 p = w(:)' / sum(w); -1066 end -1067 end -1068 -1069 switch replace -1070 -1071 % Sample with replacement -1072 case {true, 'true', 1} -1073 if isempty(w) -1074 y = ceil(n .* rand(k,1)); -1075 else -1076 [dum, y] = histc(rand(k,1),[0 cumsum(p)]); -1077 end -1078 -1079 % Sample without replacement -1080 case {false, 'false', 0} -1081 if k > n -1082 if isempty(population) -1083 error('stats:randsample:SampleTooLarge',... -1084 'K must be less than or equal to N for sampling without replacement.'); -1085 else -1086 error('stats:randsample:SampleTooLarge',... -1087 'K must be less than or equal to the population size.'); -1088 end -1089 end -1090 -1091 if isempty(w) -1092 % If the sample is a sizeable fraction of the population, -1093 % just randomize the whole population (which involves a full -1094 % sort of n random values), and take the first k. -1095 if 4*k > n -1096 rp = randperm(n); -1097 y = rp(1:k); -1098 -1099 % If the sample is a small fraction of the population, a full sort -1100 % is wasteful. Repeatedly sample with replacement until there are -1101 % k unique values. -1102 else -1103 x = zeros(1,n); % flags -1104 sumx = 0; -1105 while sumx < k -1106 x(ceil(n * rand(1,k-sumx))) = 1; % sample w/replacement -1107 sumx = sum(x); % count how many unique elements so far -1108 end -1109 y = find(x > 0); -1110 y = y(randperm(k)); -1111 end -1112 else -1113 error('stats:randsample:NoWeighting',... -1114 'Weighted sampling without replacement is not supported.'); -1115 end -1116 otherwise -1117 error('stats:randsample:BadReplaceValue',... -1118 'REPLACE must be either true or false.'); -1119 end -1120 -1121 if ~isempty(population) -1122 y = population(y); -1123 else -1124 y = y(:); -1125 end -1126 end +1032 % Y = RANDSAMPLE(...,true,W) returns a weighted sample, using positive +1033 % weights W, taken with replacement. W is often a vector of probabilities. +1034 % This function does not support weighted sampling without replacement. +1035 % +1036 % Example: Generate a random sequence of the characters ACGT, with +1037 % replacement, according to specified probabilities. +1038 % +1039 % R = randsample('ACGT',48,true,[0.15 0.35 0.35 0.15]) +1040 % +1041 % See also RAND, RANDPERM. +1042 +1043 % Copyright 1993-2008 The MathWorks, Inc. +1044 % $Revision: 1.1.4.3 $ $Date: 2008/12/01 08:09:34 $ +1045 +1046 if nargin < 2 +1047 error('stats:randsample:TooFewInputs','Requires two input arguments.'); +1048 elseif numel(n) == 1 +1049 population = []; +1050 else +1051 population = n; +1052 n = numel(population); +1053 if length(population)~=n +1054 error('stats:randsample:BadPopulation','POPULATION must be a vector.'); +1055 end +1056 end +1057 +1058 if nargin < 3 +1059 replace = false; +1060 end +1061 +1062 if nargin < 4 +1063 w = []; +1064 elseif ~isempty(w) +1065 if length(w) ~= n +1066 if isempty(population) +1067 error('stats:randsample:InputSizeMismatch',... +1068 'W must have length equal to N.'); +1069 else +1070 error('stats:randsample:InputSizeMismatch',... +1071 'W must have the same length as the population.'); +1072 end +1073 else +1074 p = w(:)' / sum(w); +1075 end +1076 end +1077 +1078 switch replace +1079 +1080 % Sample with replacement +1081 case {true, 'true', 1} +1082 if isempty(w) +1083 y = ceil(n .* rand(k,1)); +1084 else +1085 [dum, y] = histc(rand(k,1),[0 cumsum(p)]); +1086 end +1087 +1088 % Sample without replacement +1089 case {false, 'false', 0} +1090 if k > n +1091 if isempty(population) +1092 error('stats:randsample:SampleTooLarge',... +1093 'K must be less than or equal to N for sampling without replacement.'); +1094 else +1095 error('stats:randsample:SampleTooLarge',... +1096 'K must be less than or equal to the population size.'); +1097 end +1098 end +1099 +1100 if isempty(w) +1101 % If the sample is a sizeable fraction of the population, +1102 % just randomize the whole population (which involves a full +1103 % sort of n random values), and take the first k. +1104 if 4*k > n +1105 rp = randperm(n); +1106 y = rp(1:k); +1107 +1108 % If the sample is a small fraction of the population, a full sort +1109 % is wasteful. Repeatedly sample with replacement until there are +1110 % k unique values. +1111 else +1112 x = zeros(1,n); % flags +1113 sumx = 0; +1114 while sumx < k +1115 x(ceil(n * rand(1,k-sumx))) = 1; % sample w/replacement +1116 sumx = sum(x); % count how many unique elements so far +1117 end +1118 y = find(x > 0); +1119 y = y(randperm(k)); +1120 end +1121 else +1122 error('stats:randsample:NoWeighting',... +1123 'Weighted sampling without replacement is not supported.'); +1124 end +1125 otherwise +1126 error('stats:randsample:BadReplaceValue',... +1127 'REPLACE must be either true or false.'); +1128 end +1129 +1130 if ~isempty(population) +1131 y = population(y); +1132 else +1133 y = y(:); +1134 end +1135 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/removeReactions.html b/doc/core/removeReactions.html index e74f0152..e9cc84ac 100644 --- a/doc/core/removeReactions.html +++ b/doc/core/removeReactions.html @@ -192,23 +192,27 @@

SOURCE CODE ^if isfield(reducedModel,'geneShortNames') 0130 reducedModel.geneShortNames=reducedModel.geneShortNames(toKeep); 0131 end -0132 -0133 if isfield(reducedModel,'geneMiriams') -0134 reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); +0132 +0133 if isfield(reducedModel,'proteinNames') +0134 reducedModel.proteinNames=reducedModel.proteinNames(toKeep); 0135 end 0136 -0137 if isfield(reducedModel,'geneFrom') -0138 reducedModel.geneFrom=reducedModel.geneFrom(toKeep); +0137 if isfield(reducedModel,'geneMiriams') +0138 reducedModel.geneMiriams=reducedModel.geneMiriams(toKeep); 0139 end 0140 -0141 if isfield(reducedModel,'geneComps') -0142 reducedModel.geneComps=reducedModel.geneComps(toKeep); +0141 if isfield(reducedModel,'geneFrom') +0142 reducedModel.geneFrom=reducedModel.geneFrom(toKeep); 0143 end -0144 end -0145 else -0146 reducedModel=model; -0147 end -0148 end +0144 +0145 if isfield(reducedModel,'geneComps') +0146 reducedModel.geneComps=reducedModel.geneComps(toKeep); +0147 end +0148 end +0149 else +0150 reducedModel=model; +0151 end +0152 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/core/simplifyModel.html b/doc/core/simplifyModel.html index 01d16cc8..f7ae25a4 100644 --- a/doc/core/simplifyModel.html +++ b/doc/core/simplifyModel.html @@ -299,102 +299,105 @@

SOURCE CODE ^if isfield(reducedModel,'geneShortNames') 0222 reducedModel.geneShortNames={}; 0223 end -0224 if isfield(reducedModel,'geneMiriams') -0225 reducedModel.geneMiriams={}; +0224 if isfield(reducedModel,'proteinNames') +0225 reducedModel.proteinNames={}; 0226 end -0227 if isfield(reducedModel,'geneComps') -0228 reducedModel.geneComps=[]; +0227 if isfield(reducedModel,'geneMiriams') +0228 reducedModel.geneMiriams={}; 0229 end -0230 -0231 %Convert the model to irreversible -0232 irrevModel=convertToIrrev(reducedModel); +0230 if isfield(reducedModel,'geneComps') +0231 reducedModel.geneComps=[]; +0232 end 0233 -0234 %Loop through and iteratively group linear reactions -0235 while 1 -0236 %Get the banned reaction indexes. Note that the indexes will change -0237 %in each iteration, but the names will not as they won't be merged -0238 %with any other reaction -0239 bannedIndexes=getIndexes(irrevModel,reservedRxns,'rxns'); -0240 -0241 %Select all metabolites that are only present as reactants/products -0242 %in one reaction -0243 singleNegative=find(sum(irrevModel.S'<0)==1); -0244 singlePositive=find(sum(irrevModel.S'>0)==1); -0245 -0246 %Retrieve the common metabolites -0247 common=intersect(singleNegative,singlePositive); +0234 %Convert the model to irreversible +0235 irrevModel=convertToIrrev(reducedModel); +0236 +0237 %Loop through and iteratively group linear reactions +0238 while 1 +0239 %Get the banned reaction indexes. Note that the indexes will change +0240 %in each iteration, but the names will not as they won't be merged +0241 %with any other reaction +0242 bannedIndexes=getIndexes(irrevModel,reservedRxns,'rxns'); +0243 +0244 %Select all metabolites that are only present as reactants/products +0245 %in one reaction +0246 singleNegative=find(sum(irrevModel.S'<0)==1); +0247 singlePositive=find(sum(irrevModel.S'>0)==1); 0248 -0249 mergedSome=false; -0250 -0251 %Loop through each of them and see if the reactions should be -0252 %merged -0253 for i=1:numel(common) -0254 involvedRxns=find(irrevModel.S(common(i),:)); -0255 -0256 %Check so that one or both of the reactions haven't been merged -0257 %already -0258 if numel(involvedRxns)==2 && isempty(intersect(bannedIndexes,involvedRxns)) -0259 %Calculate how many times the second reaction has to be -0260 %multiplied before being merged with the first -0261 stoichRatio=abs(irrevModel.S(common(i),involvedRxns(1))/irrevModel.S(common(i),involvedRxns(2))); -0262 -0263 %Add the second to the first -0264 irrevModel.S(:,involvedRxns(1))=irrevModel.S(:,involvedRxns(1))+irrevModel.S(:,involvedRxns(2))*stoichRatio; +0249 %Retrieve the common metabolites +0250 common=intersect(singleNegative,singlePositive); +0251 +0252 mergedSome=false; +0253 +0254 %Loop through each of them and see if the reactions should be +0255 %merged +0256 for i=1:numel(common) +0257 involvedRxns=find(irrevModel.S(common(i),:)); +0258 +0259 %Check so that one or both of the reactions haven't been merged +0260 %already +0261 if numel(involvedRxns)==2 && isempty(intersect(bannedIndexes,involvedRxns)) +0262 %Calculate how many times the second reaction has to be +0263 %multiplied before being merged with the first +0264 stoichRatio=abs(irrevModel.S(common(i),involvedRxns(1))/irrevModel.S(common(i),involvedRxns(2))); 0265 -0266 %Clear the second reaction -0267 irrevModel.S(:,involvedRxns(2))=0; +0266 %Add the second to the first +0267 irrevModel.S(:,involvedRxns(1))=irrevModel.S(:,involvedRxns(1))+irrevModel.S(:,involvedRxns(2))*stoichRatio; 0268 -0269 %This is to prevent numerical issues. It should be 0 -0270 %already -0271 irrevModel.S(common(i),involvedRxns(1))=0; -0272 -0273 %At this point the second reaction is certain to be deleted -0274 %in a later step and can therefore be ignored +0269 %Clear the second reaction +0270 irrevModel.S(:,involvedRxns(2))=0; +0271 +0272 %This is to prevent numerical issues. It should be 0 +0273 %already +0274 irrevModel.S(common(i),involvedRxns(1))=0; 0275 -0276 %Recalculate the bounds for the new reaction. This can be -0277 %problematic since the scale of the bounds may change -0278 %dramatically. Let the most constraining reaction determine -0279 %the new bound -0280 lb1=irrevModel.lb(involvedRxns(1)); -0281 lb2=irrevModel.lb(involvedRxns(2)); -0282 ub1=irrevModel.ub(involvedRxns(1)); -0283 ub2=irrevModel.ub(involvedRxns(2)); -0284 -0285 if lb2~=-inf -0286 irrevModel.lb(involvedRxns(1))=max(lb1,lb2/stoichRatio); -0287 end -0288 if ub2~=inf -0289 irrevModel.ub(involvedRxns(1))=min(ub1,ub2/stoichRatio); +0276 %At this point the second reaction is certain to be deleted +0277 %in a later step and can therefore be ignored +0278 +0279 %Recalculate the bounds for the new reaction. This can be +0280 %problematic since the scale of the bounds may change +0281 %dramatically. Let the most constraining reaction determine +0282 %the new bound +0283 lb1=irrevModel.lb(involvedRxns(1)); +0284 lb2=irrevModel.lb(involvedRxns(2)); +0285 ub1=irrevModel.ub(involvedRxns(1)); +0286 ub2=irrevModel.ub(involvedRxns(2)); +0287 +0288 if lb2~=-inf +0289 irrevModel.lb(involvedRxns(1))=max(lb1,lb2/stoichRatio); 0290 end -0291 -0292 %Then recalculate the objective coefficient. The resulting -0293 %coefficient is the weighted sum of the previous -0294 irrevModel.c(involvedRxns(1))=irrevModel.c(involvedRxns(1))+irrevModel.c(involvedRxns(2))*stoichRatio; -0295 -0296 %Iterate again -0297 mergedSome=true; -0298 end -0299 end -0300 -0301 %All possible reactions merged -0302 if mergedSome==false -0303 break; -0304 end -0305 -0306 %Now delete all reactions that involve no metabolites -0307 I=find(sum(irrevModel.S~=0)==0); +0291 if ub2~=inf +0292 irrevModel.ub(involvedRxns(1))=min(ub1,ub2/stoichRatio); +0293 end +0294 +0295 %Then recalculate the objective coefficient. The resulting +0296 %coefficient is the weighted sum of the previous +0297 irrevModel.c(involvedRxns(1))=irrevModel.c(involvedRxns(1))+irrevModel.c(involvedRxns(2))*stoichRatio; +0298 +0299 %Iterate again +0300 mergedSome=true; +0301 end +0302 end +0303 +0304 %All possible reactions merged +0305 if mergedSome==false +0306 break; +0307 end 0308 -0309 %Remove reactions -0310 irrevModel=removeReactions(irrevModel,I); +0309 %Now delete all reactions that involve no metabolites +0310 I=find(sum(irrevModel.S~=0)==0); 0311 -0312 %Remove metabolites -0313 notInUse=sum(irrevModel.S~=0,2)==0; -0314 irrevModel=removeMets(irrevModel,notInUse); -0315 end -0316 -0317 reducedModel=irrevModel; -0318 end -0319 end +0312 %Remove reactions +0313 irrevModel=removeReactions(irrevModel,I); +0314 +0315 %Remove metabolites +0316 notInUse=sum(irrevModel.S~=0,2)==0; +0317 irrevModel=removeMets(irrevModel,notInUse); +0318 end +0319 +0320 reducedModel=irrevModel; +0321 end +0322 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/io/exportModel.html b/doc/io/exportModel.html index 8e175fff..4524a780 100644 --- a/doc/io/exportModel.html +++ b/doc/io/exportModel.html @@ -161,716 +161,724 @@

SOURCE CODE ^if ~isfield(model,'geneShortNames') && isfield(model,'genes') 0100 model.geneShortNames=cell(numel(model.genes),1); 0101 end -0102 if ~isfield(model,'subSystems') -0103 model.subSystems=cell(numel(model.rxns),1); +0102 if ~isfield(model,'proteinNames') && isfield(model,'genes') +0103 model.proteinNames=cell(numel(model.genes),1); 0104 end -0105 if ~isfield(model,'eccodes') -0106 model.eccodes=cell(numel(model.rxns),1); +0105 if ~isfield(model,'subSystems') +0106 model.subSystems=cell(numel(model.rxns),1); 0107 end -0108 if ~isfield(model,'rxnReferences') -0109 model.rxnReferences=cell(numel(model.rxns),1); +0108 if ~isfield(model,'eccodes') +0109 model.eccodes=cell(numel(model.rxns),1); 0110 end -0111 if ~isfield(model,'rxnConfidenceScores') -0112 model.rxnConfidenceScores=NaN(numel(model.rxns),1); +0111 if ~isfield(model,'rxnReferences') +0112 model.rxnReferences=cell(numel(model.rxns),1); 0113 end -0114 if ~isfield(model,'rxnNotes') -0115 model.rxnNotes=cell(numel(model.rxns),1); +0114 if ~isfield(model,'rxnConfidenceScores') +0115 model.rxnConfidenceScores=NaN(numel(model.rxns),1); 0116 end -0117 if ~isfield(model,'rxnMiriams') -0118 model.rxnMiriams=cell(numel(model.rxns),1); +0117 if ~isfield(model,'rxnNotes') +0118 model.rxnNotes=cell(numel(model.rxns),1); 0119 end -0120 -0121 if sbmlLevel<3 -0122 %Check if genes have associated compartments -0123 if ~isfield(model,'geneComps') && isfield(model,'genes') -0124 if supressWarnings==false -0125 EM='There are no compartments specified for genes. All genes will be assigned to the first compartment. This is because the SBML structure requires all elements to be assigned to a compartment'; -0126 dispEM(EM,false); -0127 end -0128 model.geneComps=ones(numel(model.genes),1); -0129 end -0130 end -0131 -0132 %Convert ids to SBML-convenient format. This is to avoid the data loss when -0133 %unsupported characters are included in ids. Here we are using part from -0134 %convertSBMLID, originating from the COBRA Toolbox -0135 model.rxns=regexprep(model.rxns,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); -0136 model.mets=regexprep(model.mets,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); -0137 model.comps=regexprep(model.comps,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); -0138 if isfield(model,'genes') -0139 problemGenes=find(~cellfun('isempty',regexp(model.genes,'([^0-9_a-zA-Z])'))); -0140 originalGenes=model.genes(problemGenes); -0141 replacedGenes=regexprep(model.genes(problemGenes),'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); -0142 model.genes(problemGenes)=replacedGenes; -0143 for i=1:numel(problemGenes) -0144 model.grRules = regexprep(model.grRules, ['(^|\s|\()' originalGenes{i} '($|\s|\))'], ['$1' replacedGenes{i} '$2']); -0145 end -0146 end -0147 -0148 %Generate an empty SBML structure -0149 modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0150 modelSBML.metaid=model.id; -0151 modelSBML.id=regexprep(model.id,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); -0152 modelSBML.name=model.name; -0153 -0154 if isfield(model,'annotation') -0155 if isfield(model.annotation,'note') -0156 modelSBML.notes=['<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>',regexprep(model.annotation.note,'<p>|</p>',''),'</p></body></notes>']; -0157 end -0158 else -0159 modelSBML.notes='<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>This file was generated using the exportModel function in RAVEN Toolbox 2 and OutputSBML in libSBML </p></body></notes>'; -0160 end -0161 -0162 if isfield(model,'annotation') -0163 nameString=''; -0164 if isfield(model.annotation,'familyName') -0165 if ~isempty(model.annotation.familyName) -0166 nameString=['<vCard:Family>' model.annotation.familyName '</vCard:Family>']; -0167 end -0168 end -0169 if isfield(model.annotation,'givenName') -0170 if ~isempty(model.annotation.givenName) -0171 nameString=[nameString '<vCard:Given>' model.annotation.givenName '</vCard:Given>']; -0172 end -0173 end -0174 email=''; -0175 if isfield(model.annotation,'email') -0176 if ~isempty(model.annotation.email) -0177 email=['<vCard:EMAIL>' model.annotation.email '</vCard:EMAIL>']; -0178 end -0179 end -0180 org=''; -0181 if isfield(model.annotation,'organization') -0182 if ~isempty(model.annotation.organization) -0183 org=['<vCard:ORG rdf:parseType="Resource"><vCard:Orgname>' model.annotation.organization '</vCard:Orgname></vCard:ORG>']; -0184 end -0185 end -0186 if ~isempty(nameString) || ~isempty(email) || ~isempty(org) % Only fill .annotation if ownership data is provided -0187 modelSBML.annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.id '">']; -0188 modelSBML.annotation=[modelSBML.annotation '<dc:creator><rdf:Bag><rdf:li rdf:parseType="Resource">']; -0189 if ~isempty(nameString) -0190 modelSBML.annotation=[modelSBML.annotation '<vCard:N rdf:parseType="Resource">' nameString '</vCard:N>']; -0191 end -0192 modelSBML.annotation=[modelSBML.annotation email org '</rdf:li></rdf:Bag></dc:creator>']; -0193 modelSBML.annotation=[modelSBML.annotation '<dcterms:created rdf:parseType="Resource">'... -0194 '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF></dcterms:created><dcterms:modified rdf:parseType="Resource">'... -0195 '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF></dcterms:modified>']; -0196 if isfield(model.annotation,'taxonomy') -0197 modelSBML.annotation=[modelSBML.annotation '<bqbiol:is><rdf:Bag><rdf:li rdf:resource="https://identifiers.org/taxonomy/' regexprep(model.annotation.taxonomy,'taxonomy/','') '"/></rdf:Bag></bqbiol:is>']; -0198 end -0199 modelSBML.annotation=[modelSBML.annotation '</rdf:Description></rdf:RDF></annotation>']; -0200 end -0201 end -0202 -0203 %Prepare compartments -0204 for i=1:numel(model.comps) -0205 %Add the default values, as these will be the same in all entries -0206 if i==1 -0207 if isfield(modelSBML.compartment, 'sboTerm') -0208 modelSBML.compartment(i).sboTerm=290; -0209 end -0210 if isfield(modelSBML.compartment, 'spatialDimensions') -0211 modelSBML.compartment(i).spatialDimensions=3; +0120 if ~isfield(model,'rxnMiriams') +0121 model.rxnMiriams=cell(numel(model.rxns),1); +0122 end +0123 +0124 if sbmlLevel<3 +0125 %Check if genes have associated compartments +0126 if ~isfield(model,'geneComps') && isfield(model,'genes') +0127 if supressWarnings==false +0128 EM='There are no compartments specified for genes. All genes will be assigned to the first compartment. This is because the SBML structure requires all elements to be assigned to a compartment'; +0129 dispEM(EM,false); +0130 end +0131 model.geneComps=ones(numel(model.genes),1); +0132 end +0133 end +0134 +0135 %Convert ids to SBML-convenient format. This is to avoid the data loss when +0136 %unsupported characters are included in ids. Here we are using part from +0137 %convertSBMLID, originating from the COBRA Toolbox +0138 model.rxns=regexprep(model.rxns,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); +0139 model.mets=regexprep(model.mets,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); +0140 model.comps=regexprep(model.comps,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); +0141 if isfield(model,'genes') +0142 problemGenes=find(~cellfun('isempty',regexp(model.genes,'([^0-9_a-zA-Z])'))); +0143 originalGenes=model.genes(problemGenes); +0144 replacedGenes=regexprep(model.genes(problemGenes),'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); +0145 model.genes(problemGenes)=replacedGenes; +0146 for i=1:numel(problemGenes) +0147 model.grRules = regexprep(model.grRules, ['(^|\s|\()' originalGenes{i} '($|\s|\))'], ['$1' replacedGenes{i} '$2']); +0148 end +0149 end +0150 +0151 %Generate an empty SBML structure +0152 modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0153 modelSBML.metaid=model.id; +0154 modelSBML.id=regexprep(model.id,'([^0-9_a-zA-Z])','__${num2str($1+0)}__'); +0155 modelSBML.name=model.name; +0156 +0157 if isfield(model,'annotation') +0158 if isfield(model.annotation,'note') +0159 modelSBML.notes=['<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>',regexprep(model.annotation.note,'<p>|</p>',''),'</p></body></notes>']; +0160 end +0161 else +0162 modelSBML.notes='<notes><body xmlns="http://www.w3.org/1999/xhtml"><p>This file was generated using the exportModel function in RAVEN Toolbox 2 and OutputSBML in libSBML </p></body></notes>'; +0163 end +0164 +0165 if isfield(model,'annotation') +0166 nameString=''; +0167 if isfield(model.annotation,'familyName') +0168 if ~isempty(model.annotation.familyName) +0169 nameString=['<vCard:Family>' model.annotation.familyName '</vCard:Family>']; +0170 end +0171 end +0172 if isfield(model.annotation,'givenName') +0173 if ~isempty(model.annotation.givenName) +0174 nameString=[nameString '<vCard:Given>' model.annotation.givenName '</vCard:Given>']; +0175 end +0176 end +0177 email=''; +0178 if isfield(model.annotation,'email') +0179 if ~isempty(model.annotation.email) +0180 email=['<vCard:EMAIL>' model.annotation.email '</vCard:EMAIL>']; +0181 end +0182 end +0183 org=''; +0184 if isfield(model.annotation,'organization') +0185 if ~isempty(model.annotation.organization) +0186 org=['<vCard:ORG rdf:parseType="Resource"><vCard:Orgname>' model.annotation.organization '</vCard:Orgname></vCard:ORG>']; +0187 end +0188 end +0189 if ~isempty(nameString) || ~isempty(email) || ~isempty(org) % Only fill .annotation if ownership data is provided +0190 modelSBML.annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.id '">']; +0191 modelSBML.annotation=[modelSBML.annotation '<dc:creator><rdf:Bag><rdf:li rdf:parseType="Resource">']; +0192 if ~isempty(nameString) +0193 modelSBML.annotation=[modelSBML.annotation '<vCard:N rdf:parseType="Resource">' nameString '</vCard:N>']; +0194 end +0195 modelSBML.annotation=[modelSBML.annotation email org '</rdf:li></rdf:Bag></dc:creator>']; +0196 modelSBML.annotation=[modelSBML.annotation '<dcterms:created rdf:parseType="Resource">'... +0197 '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF></dcterms:created><dcterms:modified rdf:parseType="Resource">'... +0198 '<dcterms:W3CDTF>' datestr(now,'yyyy-mm-ddTHH:MM:SSZ') '</dcterms:W3CDTF></dcterms:modified>']; +0199 if isfield(model.annotation,'taxonomy') +0200 modelSBML.annotation=[modelSBML.annotation '<bqbiol:is><rdf:Bag><rdf:li rdf:resource="https://identifiers.org/taxonomy/' regexprep(model.annotation.taxonomy,'taxonomy/','') '"/></rdf:Bag></bqbiol:is>']; +0201 end +0202 modelSBML.annotation=[modelSBML.annotation '</rdf:Description></rdf:RDF></annotation>']; +0203 end +0204 end +0205 +0206 %Prepare compartments +0207 for i=1:numel(model.comps) +0208 %Add the default values, as these will be the same in all entries +0209 if i==1 +0210 if isfield(modelSBML.compartment, 'sboTerm') +0211 modelSBML.compartment(i).sboTerm=290; 0212 end -0213 if isfield(modelSBML.compartment, 'size') -0214 modelSBML.compartment(i).size=1; +0213 if isfield(modelSBML.compartment, 'spatialDimensions') +0214 modelSBML.compartment(i).spatialDimensions=3; 0215 end -0216 if isfield(modelSBML.compartment, 'constant') -0217 modelSBML.compartment(i).constant=1; +0216 if isfield(modelSBML.compartment, 'size') +0217 modelSBML.compartment(i).size=1; 0218 end -0219 if isfield(modelSBML.compartment, 'isSetSize') -0220 modelSBML.compartment(i).isSetSize=1; +0219 if isfield(modelSBML.compartment, 'constant') +0220 modelSBML.compartment(i).constant=1; 0221 end -0222 if isfield(modelSBML.compartment, 'isSetSpatialDimensions') -0223 modelSBML.compartment(i).isSetSpatialDimensions=1; +0222 if isfield(modelSBML.compartment, 'isSetSize') +0223 modelSBML.compartment(i).isSetSize=1; 0224 end -0225 end -0226 %Copy the default values to the next entry as long as it is not the -0227 %last one -0228 if i<numel(model.comps) -0229 modelSBML.compartment(i+1)=modelSBML.compartment(i); -0230 end -0231 -0232 if isfield(modelSBML.compartment,'metaid') -0233 if regexp(model.comps{i},'^[^a-zA-Z_]') -0234 EM='The compartment IDs are in numeric format. For the compliance with SBML specifications, compartment IDs will be preceded with "c_" string'; -0235 dispEM(EM,false); -0236 model.comps(i)=strcat('c_',model.comps(i)); -0237 end -0238 modelSBML.compartment(i).metaid=model.comps{i}; -0239 end -0240 %Prepare Miriam strings -0241 if ~isempty(model.compMiriams{i}) -0242 [~,sbo_ind] = ismember('sbo',model.compMiriams{i}.name); -0243 if sbo_ind > 0 -0244 modelSBML.compartment(i).sboTerm=str2double(regexprep(model.compMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); -0245 % remove the SBO term from compMiriams so the information is -0246 % not duplicated in the "annotation" field later on -0247 model.compMiriams{i}.name(sbo_ind) = []; -0248 model.compMiriams{i}.value(sbo_ind) = []; -0249 end -0250 end -0251 if ~isempty(model.compMiriams{i}) && isfield(modelSBML.compartment(i),'annotation') -0252 modelSBML.compartment(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.comps{i} '">']; -0253 modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation '<bqbiol:is><rdf:Bag>']; -0254 modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation getMiriam(model.compMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; -0255 end -0256 if isfield(modelSBML.compartment, 'name') -0257 modelSBML.compartment(i).name=model.compNames{i}; +0225 if isfield(modelSBML.compartment, 'isSetSpatialDimensions') +0226 modelSBML.compartment(i).isSetSpatialDimensions=1; +0227 end +0228 end +0229 %Copy the default values to the next entry as long as it is not the +0230 %last one +0231 if i<numel(model.comps) +0232 modelSBML.compartment(i+1)=modelSBML.compartment(i); +0233 end +0234 +0235 if isfield(modelSBML.compartment,'metaid') +0236 if regexp(model.comps{i},'^[^a-zA-Z_]') +0237 EM='The compartment IDs are in numeric format. For the compliance with SBML specifications, compartment IDs will be preceded with "c_" string'; +0238 dispEM(EM,false); +0239 model.comps(i)=strcat('c_',model.comps(i)); +0240 end +0241 modelSBML.compartment(i).metaid=model.comps{i}; +0242 end +0243 %Prepare Miriam strings +0244 if ~isempty(model.compMiriams{i}) +0245 [~,sbo_ind] = ismember('sbo',model.compMiriams{i}.name); +0246 if sbo_ind > 0 +0247 modelSBML.compartment(i).sboTerm=str2double(regexprep(model.compMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); +0248 % remove the SBO term from compMiriams so the information is +0249 % not duplicated in the "annotation" field later on +0250 model.compMiriams{i}.name(sbo_ind) = []; +0251 model.compMiriams{i}.value(sbo_ind) = []; +0252 end +0253 end +0254 if ~isempty(model.compMiriams{i}) && isfield(modelSBML.compartment(i),'annotation') +0255 modelSBML.compartment(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.comps{i} '">']; +0256 modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation '<bqbiol:is><rdf:Bag>']; +0257 modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation getMiriam(model.compMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; 0258 end -0259 if isfield(modelSBML.compartment, 'id') -0260 modelSBML.compartment(i).id=model.comps{i}; +0259 if isfield(modelSBML.compartment, 'name') +0260 modelSBML.compartment(i).name=model.compNames{i}; 0261 end -0262 -0263 end -0264 -0265 %Begin writing species -0266 for i=1:numel(model.mets) -0267 %Add the default values, as these will be the same in all entries -0268 if i==1 -0269 if isfield(modelSBML.species, 'sboTerm') -0270 modelSBML.species(i).sboTerm=247; -0271 end -0272 if isfield(modelSBML.species, 'initialAmount') -0273 modelSBML.species(i).initialAmount=1; +0262 if isfield(modelSBML.compartment, 'id') +0263 modelSBML.compartment(i).id=model.comps{i}; +0264 end +0265 +0266 end +0267 +0268 %Begin writing species +0269 for i=1:numel(model.mets) +0270 %Add the default values, as these will be the same in all entries +0271 if i==1 +0272 if isfield(modelSBML.species, 'sboTerm') +0273 modelSBML.species(i).sboTerm=247; 0274 end -0275 if isfield(modelSBML.species, 'initialConcentration') -0276 modelSBML.species(i).initialConcentration=0; +0275 if isfield(modelSBML.species, 'initialAmount') +0276 modelSBML.species(i).initialAmount=1; 0277 end -0278 if isfield(modelSBML.species, 'isSetInitialAmount') -0279 modelSBML.species(i).isSetInitialAmount=1; +0278 if isfield(modelSBML.species, 'initialConcentration') +0279 modelSBML.species(i).initialConcentration=0; 0280 end -0281 if isfield(modelSBML.species, 'isSetInitialConcentration') -0282 modelSBML.species(i).isSetInitialConcentration=1; +0281 if isfield(modelSBML.species, 'isSetInitialAmount') +0282 modelSBML.species(i).isSetInitialAmount=1; 0283 end -0284 end -0285 %Copy the default values to the next entry as long as it is not the -0286 %last one -0287 if i<numel(model.mets) -0288 modelSBML.species(i+1)=modelSBML.species(i); -0289 end -0290 -0291 if isfield(modelSBML.species,'metaid') -0292 modelSBML.species(i).metaid=['M_' model.mets{i}]; -0293 end -0294 if isfield(modelSBML.species, 'name') -0295 modelSBML.species(i).name=model.metNames{i}; +0284 if isfield(modelSBML.species, 'isSetInitialConcentration') +0285 modelSBML.species(i).isSetInitialConcentration=1; +0286 end +0287 end +0288 %Copy the default values to the next entry as long as it is not the +0289 %last one +0290 if i<numel(model.mets) +0291 modelSBML.species(i+1)=modelSBML.species(i); +0292 end +0293 +0294 if isfield(modelSBML.species,'metaid') +0295 modelSBML.species(i).metaid=['M_' model.mets{i}]; 0296 end -0297 if isfield(modelSBML.species, 'id') -0298 modelSBML.species(i).id=['M_' model.mets{i}]; +0297 if isfield(modelSBML.species, 'name') +0298 modelSBML.species(i).name=model.metNames{i}; 0299 end -0300 if isfield(modelSBML.species, 'compartment') -0301 modelSBML.species(i).compartment=model.comps{model.metComps(i)}; +0300 if isfield(modelSBML.species, 'id') +0301 modelSBML.species(i).id=['M_' model.mets{i}]; 0302 end -0303 if isfield(model,'unconstrained') -0304 if model.unconstrained(i) -0305 modelSBML.species(i).boundaryCondition=1; -0306 end -0307 end -0308 if isfield(modelSBML.species, 'fbc_charge') && isfield(model,'metCharges') -0309 if ~isnan(model.metCharges(i)) -0310 modelSBML.species(i).fbc_charge=model.metCharges(i); -0311 modelSBML.species(i).isSetfbc_charge=1; -0312 else -0313 modelSBML.species(i).isSetfbc_charge=0; -0314 end -0315 end -0316 if ~isempty(model.metMiriams{i}) -0317 [~,sbo_ind] = ismember('sbo',model.metMiriams{i}.name); -0318 if sbo_ind > 0 -0319 modelSBML.species(i).sboTerm=str2double(regexprep(model.metMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); -0320 % remove the SBO term from metMiriams so the information is -0321 % not duplicated in the "annotation" field later on -0322 model.metMiriams{i}.name(sbo_ind) = []; -0323 model.metMiriams{i}.value(sbo_ind) = []; -0324 end -0325 end -0326 if isfield(modelSBML.species,'annotation') -0327 if ~isempty(model.metMiriams{i}) || ~isempty(model.metFormulas{i}) -0328 hasInchi=false; -0329 if ~isempty(model.metFormulas{i}) -0330 %Only export formula if there is no InChI. This is because -0331 %the metFormulas field is populated by InChIs if available -0332 if ~isempty(model.inchis{i}) -0333 hasInchi=true; -0334 end -0335 if hasInchi==false -0336 modelSBML.species(i).fbc_chemicalFormula=model.metFormulas{i}; +0303 if isfield(modelSBML.species, 'compartment') +0304 modelSBML.species(i).compartment=model.comps{model.metComps(i)}; +0305 end +0306 if isfield(model,'unconstrained') +0307 if model.unconstrained(i) +0308 modelSBML.species(i).boundaryCondition=1; +0309 end +0310 end +0311 if isfield(modelSBML.species, 'fbc_charge') && isfield(model,'metCharges') +0312 if ~isnan(model.metCharges(i)) +0313 modelSBML.species(i).fbc_charge=model.metCharges(i); +0314 modelSBML.species(i).isSetfbc_charge=1; +0315 else +0316 modelSBML.species(i).isSetfbc_charge=0; +0317 end +0318 end +0319 if ~isempty(model.metMiriams{i}) +0320 [~,sbo_ind] = ismember('sbo',model.metMiriams{i}.name); +0321 if sbo_ind > 0 +0322 modelSBML.species(i).sboTerm=str2double(regexprep(model.metMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); +0323 % remove the SBO term from metMiriams so the information is +0324 % not duplicated in the "annotation" field later on +0325 model.metMiriams{i}.name(sbo_ind) = []; +0326 model.metMiriams{i}.value(sbo_ind) = []; +0327 end +0328 end +0329 if isfield(modelSBML.species,'annotation') +0330 if ~isempty(model.metMiriams{i}) || ~isempty(model.metFormulas{i}) +0331 hasInchi=false; +0332 if ~isempty(model.metFormulas{i}) +0333 %Only export formula if there is no InChI. This is because +0334 %the metFormulas field is populated by InChIs if available +0335 if ~isempty(model.inchis{i}) +0336 hasInchi=true; 0337 end -0338 end -0339 if ~isempty(model.metMiriams{i}) || hasInchi==true -0340 modelSBML.species(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_M_' model.mets{i} '">']; -0341 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<bqbiol:is><rdf:Bag>']; -0342 if ~isempty(model.metMiriams{i}) -0343 modelSBML.species(i).annotation=[modelSBML.species(i).annotation getMiriam(model.metMiriams{i})]; -0344 end -0345 if hasInchi==true -0346 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<rdf:li rdf:resource="https://identifiers.org/inchi/InChI=' regexprep(model.inchis{i},'^InChI=','') '"/>']; -0347 modelSBML.species(i).fbc_chemicalFormula=char(regexp(model.inchis{i}, '/(\w+)/', 'tokens', 'once')); -0348 end -0349 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; -0350 end -0351 end -0352 end -0353 end -0354 -0355 if isfield(model,'genes') -0356 for i=1:numel(model.genes) -0357 %Add the default values, as these will be the same in all entries -0358 if i==1 -0359 if isfield(modelSBML.fbc_geneProduct, 'sboTerm') -0360 modelSBML.fbc_geneProduct(i).sboTerm=243; -0361 end -0362 end -0363 %Copy the default values to the next index as long as it is not the -0364 %last one -0365 if i<numel(model.genes) -0366 modelSBML.fbc_geneProduct(i+1)=modelSBML.fbc_geneProduct(i); -0367 end -0368 -0369 if isfield(modelSBML.fbc_geneProduct,'metaid') -0370 modelSBML.fbc_geneProduct(i).metaid=model.genes{i}; -0371 end -0372 if ~isempty(model.geneMiriams{i}) -0373 [~,sbo_ind] = ismember('sbo',model.geneMiriams{i}.name); -0374 if sbo_ind > 0 -0375 modelSBML.fbc_geneProduct(i).sboTerm=str2double(regexprep(model.geneMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); -0376 % remove the SBO term from compMiriams so the information is -0377 % not duplicated in the "annotation" field later on -0378 model.geneMiriams{i}.name(sbo_ind) = []; -0379 model.geneMiriams{i}.value(sbo_ind) = []; -0380 end -0381 end -0382 if ~isempty(model.geneMiriams{i}) && isfield(modelSBML.fbc_geneProduct(i),'annotation') -0383 modelSBML.fbc_geneProduct(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.genes{i} '">']; -0384 modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation '<bqbiol:is><rdf:Bag>']; -0385 modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation getMiriam(model.geneMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; -0386 end -0387 if isfield(modelSBML.fbc_geneProduct, 'fbc_id') -0388 modelSBML.fbc_geneProduct(i).fbc_id=model.genes{i}; +0338 if hasInchi==false +0339 modelSBML.species(i).fbc_chemicalFormula=model.metFormulas{i}; +0340 end +0341 end +0342 if ~isempty(model.metMiriams{i}) || hasInchi==true +0343 modelSBML.species(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_M_' model.mets{i} '">']; +0344 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<bqbiol:is><rdf:Bag>']; +0345 if ~isempty(model.metMiriams{i}) +0346 modelSBML.species(i).annotation=[modelSBML.species(i).annotation getMiriam(model.metMiriams{i})]; +0347 end +0348 if hasInchi==true +0349 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '<rdf:li rdf:resource="https://identifiers.org/inchi/InChI=' regexprep(model.inchis{i},'^InChI=','') '"/>']; +0350 modelSBML.species(i).fbc_chemicalFormula=char(regexp(model.inchis{i}, '/(\w+)/', 'tokens', 'once')); +0351 end +0352 modelSBML.species(i).annotation=[modelSBML.species(i).annotation '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; +0353 end +0354 end +0355 end +0356 end +0357 +0358 if isfield(model,'genes') +0359 for i=1:numel(model.genes) +0360 %Add the default values, as these will be the same in all entries +0361 if i==1 +0362 if isfield(modelSBML.fbc_geneProduct, 'sboTerm') +0363 modelSBML.fbc_geneProduct(i).sboTerm=243; +0364 end +0365 end +0366 %Copy the default values to the next index as long as it is not the +0367 %last one +0368 if i<numel(model.genes) +0369 modelSBML.fbc_geneProduct(i+1)=modelSBML.fbc_geneProduct(i); +0370 end +0371 +0372 if isfield(modelSBML.fbc_geneProduct,'metaid') +0373 modelSBML.fbc_geneProduct(i).metaid=model.genes{i}; +0374 end +0375 if ~isempty(model.geneMiriams{i}) +0376 [~,sbo_ind] = ismember('sbo',model.geneMiriams{i}.name); +0377 if sbo_ind > 0 +0378 modelSBML.fbc_geneProduct(i).sboTerm=str2double(regexprep(model.geneMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); +0379 % remove the SBO term from compMiriams so the information is +0380 % not duplicated in the "annotation" field later on +0381 model.geneMiriams{i}.name(sbo_ind) = []; +0382 model.geneMiriams{i}.value(sbo_ind) = []; +0383 end +0384 end +0385 if ~isempty(model.geneMiriams{i}) && isfield(modelSBML.fbc_geneProduct(i),'annotation') +0386 modelSBML.fbc_geneProduct(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.genes{i} '">']; +0387 modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation '<bqbiol:is><rdf:Bag>']; +0388 modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation getMiriam(model.geneMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; 0389 end -0390 if isfield(modelSBML.fbc_geneProduct, 'fbc_label') && isfield(model,'geneShortNames') -0391 if isempty(model.geneShortNames{i}) -0392 modelSBML.fbc_geneProduct(i).fbc_label=model.genes{i}; -0393 else -0394 modelSBML.fbc_geneProduct(i).fbc_label=model.geneShortNames{i}; -0395 end -0396 end -0397 end -0398 if exportGeneComplexes==true -0399 %Also add the complexes as genes. This is done by splitting grRules -0400 %on "or" and adding the ones which contain several genes -0401 geneComplexes={}; -0402 if isfield(model,'grRules') -0403 %Only grRules which contain " and " can be complexes -0404 uniqueRules=unique(model.grRules); -0405 I=cellfun(@any,strfind(uniqueRules,' and ')); -0406 uniqueRules(~I)=[]; -0407 uniqueRules=strrep(uniqueRules,'(',''); -0408 uniqueRules=strrep(uniqueRules,')',''); -0409 uniqueRules=strrep(uniqueRules,' and ',':'); -0410 for i=1:numel(uniqueRules) -0411 genes=regexp(uniqueRules(i),' or ','split'); -0412 genes=genes{1}(:); -0413 %Check which ones are complexes -0414 I=cellfun(@any,strfind(genes,':')); -0415 geneComplexes=[geneComplexes;genes(I)]; -0416 end -0417 end -0418 geneComplexes=unique(geneComplexes); -0419 if ~isempty(geneComplexes) -0420 %Then add them as genes. There is a possiblity that a complex -0421 %A&B is added as separate from B&A. This is not really an issue -0422 %so this is not dealt with -0423 for i=1:numel(geneComplexes) -0424 modelSBML.fbc_geneProduct(numel(model.genes)+i)=modelSBML.fbc_geneProduct(1); -0425 if isfield(modelSBML.fbc_geneProduct,'metaid') -0426 modelSBML.fbc_geneProduct(numel(model.genes)+i).metaid=geneComplexes{i}; -0427 end -0428 if isfield(modelSBML.fbc_geneProduct,'fbc_id') -0429 modelSBML.fbc_geneProduct(numel(model.genes)+i).fbc_id=geneComplexes{i}; -0430 else -0431 modelSBML.fbc_geneProduct(i).fbc_label=modelSBML.fbc_geneProduct(i).fbc_id; -0432 end -0433 end -0434 end -0435 end -0436 end -0437 -0438 %Generate a list of unique fbc_bound names -0439 totalValues=[model.lb; model.ub]; -0440 totalNames=cell(size(totalValues,1),1); -0441 -0442 listUniqueValues=unique(totalValues); -0443 -0444 for i=1:length(listUniqueValues) -0445 listUniqueNames{i,1}=['FB',num2str(i),'N',num2str(abs(round(listUniqueValues(i))))]; % create unique flux bound IDs. -0446 ind=find(ismember(totalValues,listUniqueValues(i))); -0447 totalNames(ind)=listUniqueNames(i,1); -0448 end +0390 if isfield(modelSBML.fbc_geneProduct, 'fbc_id') +0391 modelSBML.fbc_geneProduct(i).fbc_id=model.genes{i}; +0392 end +0393 if isfield(modelSBML.fbc_geneProduct, 'fbc_label') && isfield(model,'geneShortNames') +0394 if isempty(model.geneShortNames{i}) +0395 modelSBML.fbc_geneProduct(i).fbc_label=model.genes{i}; +0396 else +0397 modelSBML.fbc_geneProduct(i).fbc_label=model.geneShortNames{i}; +0398 end +0399 end +0400 if isfield(modelSBML.fbc_geneProduct, 'fbc_name') && isfield(model,'proteinNames') +0401 if ~isempty(model.proteinNames{i}) +0402 modelSBML.fbc_geneProduct(i).fbc_name=model.proteinNames{i}; +0403 end +0404 end +0405 end +0406 if exportGeneComplexes==true +0407 %Also add the complexes as genes. This is done by splitting grRules +0408 %on "or" and adding the ones which contain several genes +0409 geneComplexes={}; +0410 if isfield(model,'grRules') +0411 %Only grRules which contain " and " can be complexes +0412 uniqueRules=unique(model.grRules); +0413 I=cellfun(@any,strfind(uniqueRules,' and ')); +0414 uniqueRules(~I)=[]; +0415 uniqueRules=strrep(uniqueRules,'(',''); +0416 uniqueRules=strrep(uniqueRules,')',''); +0417 uniqueRules=strrep(uniqueRules,' and ',':'); +0418 for i=1:numel(uniqueRules) +0419 genes=regexp(uniqueRules(i),' or ','split'); +0420 genes=genes{1}(:); +0421 %Check which ones are complexes +0422 I=cellfun(@any,strfind(genes,':')); +0423 geneComplexes=[geneComplexes;genes(I)]; +0424 end +0425 end +0426 geneComplexes=unique(geneComplexes); +0427 if ~isempty(geneComplexes) +0428 %Then add them as genes. There is a possiblity that a complex +0429 %A&B is added as separate from B&A. This is not really an issue +0430 %so this is not dealt with +0431 for i=1:numel(geneComplexes) +0432 modelSBML.fbc_geneProduct(numel(model.genes)+i)=modelSBML.fbc_geneProduct(1); +0433 if isfield(modelSBML.fbc_geneProduct,'metaid') +0434 modelSBML.fbc_geneProduct(numel(model.genes)+i).metaid=geneComplexes{i}; +0435 end +0436 if isfield(modelSBML.fbc_geneProduct,'fbc_id') +0437 modelSBML.fbc_geneProduct(numel(model.genes)+i).fbc_id=geneComplexes{i}; +0438 else +0439 modelSBML.fbc_geneProduct(i).fbc_label=modelSBML.fbc_geneProduct(i).fbc_id; +0440 end +0441 end +0442 end +0443 end +0444 end +0445 +0446 %Generate a list of unique fbc_bound names +0447 totalValues=[model.lb; model.ub]; +0448 totalNames=cell(size(totalValues,1),1); 0449 -0450 for i=1:length(listUniqueNames) -0451 %Add the default values, as these will be the same in all entries -0452 if i==1 -0453 if isfield(modelSBML.parameter, 'constant') -0454 modelSBML.parameter(i).constant=1; -0455 end -0456 if isfield(modelSBML.parameter, 'isSetValue') -0457 modelSBML.parameter(i).isSetValue=1; -0458 end -0459 end -0460 %Copy the default values to the next index as long as it is not the -0461 %last one -0462 if i<numel(listUniqueNames) -0463 modelSBML.parameter(i+1)=modelSBML.parameter(i); -0464 end -0465 modelSBML.parameter(i).id=listUniqueNames{i}; -0466 modelSBML.parameter(i).value=listUniqueValues(i); -0467 end -0468 -0469 for i=1:numel(model.rxns) -0470 %Add the default values, as these will be the same in all entries -0471 if i==1 -0472 if isfield(modelSBML.reaction, 'sboTerm') -0473 modelSBML.reaction(i).sboTerm=176; -0474 end -0475 if isfield(modelSBML.reaction, 'isSetFast') -0476 modelSBML.reaction(i).isSetFast=1; -0477 end -0478 end -0479 %Copy the default values to the next index as long as it is not the -0480 %last one -0481 if i<numel(model.rxns) -0482 modelSBML.reaction(i+1)=modelSBML.reaction(i); -0483 end -0484 -0485 if isfield(modelSBML.reaction,'metaid') -0486 modelSBML.reaction(i).metaid=['R_' model.rxns{i}]; -0487 end -0488 -0489 %Export notes information -0490 if (~isnan(model.rxnConfidenceScores(i)) || ~isempty(model.rxnReferences{i}) || ~isempty(model.rxnNotes{i})) -0491 modelSBML.reaction(i).notes='<notes><body xmlns="http://www.w3.org/1999/xhtml">'; -0492 if ~isnan(model.rxnConfidenceScores(i)) -0493 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>Confidence Level: ' num2str(model.rxnConfidenceScores(i)) '</p>']; -0494 end -0495 if ~isempty(model.rxnReferences{i}) -0496 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>AUTHORS: ' model.rxnReferences{i} '</p>']; -0497 end -0498 if ~isempty(model.rxnNotes{i}) -0499 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>NOTES: ' model.rxnNotes{i} '</p>']; -0500 end -0501 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '</body></notes>']; -0502 end -0503 -0504 % Export SBO terms from rxnMiriams -0505 if ~isempty(model.rxnMiriams{i}) -0506 [~,sbo_ind] = ismember('sbo',model.rxnMiriams{i}.name); -0507 if sbo_ind > 0 -0508 modelSBML.reaction(i).sboTerm=str2double(regexprep(model.rxnMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); -0509 % remove the SBO term from rxnMiriams so the information is not -0510 % duplicated in the "annotation" field later on -0511 model.rxnMiriams{i}.name(sbo_ind) = []; -0512 model.rxnMiriams{i}.value(sbo_ind) = []; -0513 end -0514 end -0515 -0516 %Export annotation information from rxnMiriams -0517 if (~isempty(model.rxnMiriams{i}) && isfield(modelSBML.reaction(i),'annotation')) || ~isempty(model.eccodes{i}) -0518 modelSBML.reaction(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_R_' model.rxns{i} '">']; -0519 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '<bqbiol:is><rdf:Bag>']; -0520 if ~isempty(model.eccodes{i}) -0521 eccodes=regexp(model.eccodes{i},';','split'); -0522 for j=1:numel(eccodes) -0523 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '<rdf:li rdf:resource="https://identifiers.org/ec-code/' regexprep(eccodes{j},'ec-code/|EC','') '"/>']; -0524 end -0525 end -0526 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation getMiriam(model.rxnMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; -0527 end -0528 -0529 if isfield(modelSBML.reaction, 'name') -0530 modelSBML.reaction(i).name=model.rxnNames{i}; -0531 end -0532 if isfield(modelSBML.reaction, 'id') -0533 modelSBML.reaction(i).id=['R_' model.rxns{i}]; -0534 end -0535 -0536 %Add the information about reactants and products -0537 involvedMets=addReactantsProducts(model,modelSBML,i); -0538 for j=1:numel(involvedMets.reactant) -0539 if j<numel(involvedMets.reactant) -0540 modelSBML.reaction(i).reactant(j+1)=modelSBML.reaction(i).reactant(j); -0541 end -0542 modelSBML.reaction(i).reactant(j).species=involvedMets.reactant(j).species; -0543 modelSBML.reaction(i).reactant(j).stoichiometry=involvedMets.reactant(j).stoichiometry; -0544 modelSBML.reaction(i).reactant(j).isSetStoichiometry=involvedMets.reactant(j).isSetStoichiometry; -0545 modelSBML.reaction(i).reactant(j).constant=involvedMets.reactant(j).constant; -0546 end -0547 if numel(involvedMets.reactant)==0 -0548 modelSBML.reaction(i).reactant=''; -0549 end -0550 for j=1:numel(involvedMets.product) -0551 if j<numel(involvedMets.product) -0552 modelSBML.reaction(i).product(j+1)=modelSBML.reaction(i).product(j); -0553 end -0554 modelSBML.reaction(i).product(j).species=involvedMets.product(j).species; -0555 modelSBML.reaction(i).product(j).stoichiometry=involvedMets.product(j).stoichiometry; -0556 modelSBML.reaction(i).product(j).isSetStoichiometry=involvedMets.product(j).isSetStoichiometry; -0557 modelSBML.reaction(i).product(j).constant=involvedMets.product(j).constant; -0558 end -0559 if numel(involvedMets.product)==0 -0560 modelSBML.reaction(i).product=''; -0561 end -0562 %Export reversibility information. Reactions are irreversible by -0563 %default -0564 if model.rev(i)==1 -0565 modelSBML.reaction(i).reversible=1; +0450 listUniqueValues=unique(totalValues); +0451 +0452 for i=1:length(listUniqueValues) +0453 listUniqueNames{i,1}=['FB',num2str(i),'N',num2str(abs(round(listUniqueValues(i))))]; % create unique flux bound IDs. +0454 ind=find(ismember(totalValues,listUniqueValues(i))); +0455 totalNames(ind)=listUniqueNames(i,1); +0456 end +0457 +0458 for i=1:length(listUniqueNames) +0459 %Add the default values, as these will be the same in all entries +0460 if i==1 +0461 if isfield(modelSBML.parameter, 'constant') +0462 modelSBML.parameter(i).constant=1; +0463 end +0464 if isfield(modelSBML.parameter, 'isSetValue') +0465 modelSBML.parameter(i).isSetValue=1; +0466 end +0467 end +0468 %Copy the default values to the next index as long as it is not the +0469 %last one +0470 if i<numel(listUniqueNames) +0471 modelSBML.parameter(i+1)=modelSBML.parameter(i); +0472 end +0473 modelSBML.parameter(i).id=listUniqueNames{i}; +0474 modelSBML.parameter(i).value=listUniqueValues(i); +0475 end +0476 +0477 for i=1:numel(model.rxns) +0478 %Add the default values, as these will be the same in all entries +0479 if i==1 +0480 if isfield(modelSBML.reaction, 'sboTerm') +0481 modelSBML.reaction(i).sboTerm=176; +0482 end +0483 if isfield(modelSBML.reaction, 'isSetFast') +0484 modelSBML.reaction(i).isSetFast=1; +0485 end +0486 end +0487 %Copy the default values to the next index as long as it is not the +0488 %last one +0489 if i<numel(model.rxns) +0490 modelSBML.reaction(i+1)=modelSBML.reaction(i); +0491 end +0492 +0493 if isfield(modelSBML.reaction,'metaid') +0494 modelSBML.reaction(i).metaid=['R_' model.rxns{i}]; +0495 end +0496 +0497 %Export notes information +0498 if (~isnan(model.rxnConfidenceScores(i)) || ~isempty(model.rxnReferences{i}) || ~isempty(model.rxnNotes{i})) +0499 modelSBML.reaction(i).notes='<notes><body xmlns="http://www.w3.org/1999/xhtml">'; +0500 if ~isnan(model.rxnConfidenceScores(i)) +0501 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>Confidence Level: ' num2str(model.rxnConfidenceScores(i)) '</p>']; +0502 end +0503 if ~isempty(model.rxnReferences{i}) +0504 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>AUTHORS: ' model.rxnReferences{i} '</p>']; +0505 end +0506 if ~isempty(model.rxnNotes{i}) +0507 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '<p>NOTES: ' model.rxnNotes{i} '</p>']; +0508 end +0509 modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '</body></notes>']; +0510 end +0511 +0512 % Export SBO terms from rxnMiriams +0513 if ~isempty(model.rxnMiriams{i}) +0514 [~,sbo_ind] = ismember('sbo',model.rxnMiriams{i}.name); +0515 if sbo_ind > 0 +0516 modelSBML.reaction(i).sboTerm=str2double(regexprep(model.rxnMiriams{i}.value{sbo_ind},'SBO:','','ignorecase')); +0517 % remove the SBO term from rxnMiriams so the information is not +0518 % duplicated in the "annotation" field later on +0519 model.rxnMiriams{i}.name(sbo_ind) = []; +0520 model.rxnMiriams{i}.value(sbo_ind) = []; +0521 end +0522 end +0523 +0524 %Export annotation information from rxnMiriams +0525 if (~isempty(model.rxnMiriams{i}) && isfield(modelSBML.reaction(i),'annotation')) || ~isempty(model.eccodes{i}) +0526 modelSBML.reaction(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_R_' model.rxns{i} '">']; +0527 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '<bqbiol:is><rdf:Bag>']; +0528 if ~isempty(model.eccodes{i}) +0529 eccodes=regexp(model.eccodes{i},';','split'); +0530 for j=1:numel(eccodes) +0531 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation '<rdf:li rdf:resource="https://identifiers.org/ec-code/' regexprep(eccodes{j},'ec-code/|EC','') '"/>']; +0532 end +0533 end +0534 modelSBML.reaction(i).annotation=[modelSBML.reaction(i).annotation getMiriam(model.rxnMiriams{i}) '</rdf:Bag></bqbiol:is></rdf:Description></rdf:RDF></annotation>']; +0535 end +0536 +0537 if isfield(modelSBML.reaction, 'name') +0538 modelSBML.reaction(i).name=model.rxnNames{i}; +0539 end +0540 if isfield(modelSBML.reaction, 'id') +0541 modelSBML.reaction(i).id=['R_' model.rxns{i}]; +0542 end +0543 +0544 %Add the information about reactants and products +0545 involvedMets=addReactantsProducts(model,modelSBML,i); +0546 for j=1:numel(involvedMets.reactant) +0547 if j<numel(involvedMets.reactant) +0548 modelSBML.reaction(i).reactant(j+1)=modelSBML.reaction(i).reactant(j); +0549 end +0550 modelSBML.reaction(i).reactant(j).species=involvedMets.reactant(j).species; +0551 modelSBML.reaction(i).reactant(j).stoichiometry=involvedMets.reactant(j).stoichiometry; +0552 modelSBML.reaction(i).reactant(j).isSetStoichiometry=involvedMets.reactant(j).isSetStoichiometry; +0553 modelSBML.reaction(i).reactant(j).constant=involvedMets.reactant(j).constant; +0554 end +0555 if numel(involvedMets.reactant)==0 +0556 modelSBML.reaction(i).reactant=''; +0557 end +0558 for j=1:numel(involvedMets.product) +0559 if j<numel(involvedMets.product) +0560 modelSBML.reaction(i).product(j+1)=modelSBML.reaction(i).product(j); +0561 end +0562 modelSBML.reaction(i).product(j).species=involvedMets.product(j).species; +0563 modelSBML.reaction(i).product(j).stoichiometry=involvedMets.product(j).stoichiometry; +0564 modelSBML.reaction(i).product(j).isSetStoichiometry=involvedMets.product(j).isSetStoichiometry; +0565 modelSBML.reaction(i).product(j).constant=involvedMets.product(j).constant; 0566 end -0567 if isfield(model, 'rxnComps') -0568 modelSBML.reaction(i).compartment=model.comps{model.rxnComps(i)}; +0567 if numel(involvedMets.product)==0 +0568 modelSBML.reaction(i).product=''; 0569 end -0570 if isfield(model, 'grRules') -0571 modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association=model.grRules{i}; -0572 end -0573 modelSBML.reaction(i).fbc_lowerFluxBound=totalNames{i}; -0574 modelSBML.reaction(i).fbc_upperFluxBound=totalNames{length(model.lb)+i}; -0575 end -0576 -0577 %Prepare subSystems Code taken from COBRA functions getModelSubSystems, -0578 %writeSBML, findRxnsFromSubSystem under GNU General Public License v3.0, -0579 %license file in readme/GPL.MD. Code modified for RAVEN -0580 if modelHasSubsystems -0581 modelSBML.groups_group.groups_kind = 'partonomy'; -0582 modelSBML.groups_group.sboTerm = 633; -0583 tmpStruct=modelSBML.groups_group; +0570 %Export reversibility information. Reactions are irreversible by +0571 %default +0572 if model.rev(i)==1 +0573 modelSBML.reaction(i).reversible=1; +0574 end +0575 if isfield(model, 'rxnComps') +0576 modelSBML.reaction(i).compartment=model.comps{model.rxnComps(i)}; +0577 end +0578 if isfield(model, 'grRules') +0579 modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association=model.grRules{i}; +0580 end +0581 modelSBML.reaction(i).fbc_lowerFluxBound=totalNames{i}; +0582 modelSBML.reaction(i).fbc_upperFluxBound=totalNames{length(model.lb)+i}; +0583 end 0584 -0585 rxns=strcat('R_',model.rxns); -0586 if ~any(cellfun(@iscell,model.subSystems)) -0587 if ~any(~cellfun(@isempty,model.subSystems)) -0588 subSystems = {}; -0589 else -0590 subSystems = setdiff(model.subSystems,''); -0591 end -0592 else -0593 orderedSubs = cellfun(@(x) columnVector(x),model.subSystems,'UniformOUtput',false); -0594 subSystems = setdiff(vertcat(orderedSubs{:}),''); -0595 end -0596 if isempty(subSystems) -0597 subSystems = {}; -0598 end -0599 if ~isempty(subSystems) -0600 %Build the groups for the group package -0601 groupIDs = strcat('group',cellfun(@num2str, num2cell(1:length(subSystems)),'UniformOutput',false)); -0602 for i = 1:length(subSystems) -0603 cgroup = tmpStruct; -0604 if ~any(cellfun(@iscell,model.subSystems)) -0605 present = ismember(model.subSystems,subSystems{i}); -0606 else -0607 present = cellfun(@(x) any(ismember(x,subSystems{i})),model.subSystems); -0608 end -0609 groupMembers = rxns(present); -0610 for j = 1:numel(groupMembers) -0611 cMember = tmpStruct.groups_member; -0612 cMember.groups_idRef = groupMembers{j}; -0613 if j == 1 -0614 cgroup.groups_member = cMember; -0615 else -0616 cgroup.groups_member(j) = cMember; -0617 end -0618 end -0619 cgroup.groups_id = groupIDs{i}; -0620 cgroup.groups_name = subSystems{i}; -0621 if i == 1 -0622 modelSBML.groups_group = cgroup; -0623 else -0624 modelSBML.groups_group(i) = cgroup; -0625 end -0626 end -0627 end -0628 end -0629 -0630 %Prepare fbc_objective subfield -0631 -0632 modelSBML.fbc_objective.fbc_type='maximize'; -0633 modelSBML.fbc_objective.fbc_id='obj'; -0634 -0635 ind=find(model.c); -0636 -0637 if isempty(ind) -0638 modelSBML.fbc_objective.fbc_fluxObjective.fbc_coefficient=0; -0639 else -0640 for i=1:length(ind) -0641 %Copy the default values to the next index as long as it is not the -0642 %last one -0643 if i<numel(ind) -0644 modelSBML.reaction(i+1)=modelSBML.reaction(i); -0645 end -0646 values=model.c(model.c~=0); -0647 modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction=modelSBML.reaction(ind(i)).id; -0648 modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient=values(i); -0649 modelSBML.fbc_objective(i).fbc_fluxObjective.isSetfbc_coefficient=1; -0650 end -0651 end -0652 -0653 modelSBML.fbc_activeObjective=modelSBML.fbc_objective.fbc_id; -0654 -0655 fbcStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/fbc/version',num2str(sbmlPackageVersions(1))]; -0656 if modelHasSubsystems -0657 groupStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/groups/version',num2str(sbmlPackageVersions(2))]; -0658 modelSBML.namespaces=struct('prefix',{'','fbc','groups'},... -0659 'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],... -0660 fbcStr,groupStr}); -0661 else -0662 modelSBML.namespaces=struct('prefix',{'','fbc'},... -0663 'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],... -0664 fbcStr}); -0665 end -0666 -0667 if sbmlPackageVersions(1) == 2 -0668 modelSBML.fbc_strict=1; -0669 modelSBML.isSetfbc_strict = 1; -0670 end -0671 -0672 modelSBML.rule=[]; -0673 modelSBML.constraint=[]; +0585 %Prepare subSystems Code taken from COBRA functions getModelSubSystems, +0586 %writeSBML, findRxnsFromSubSystem under GNU General Public License v3.0, +0587 %license file in readme/GPL.MD. Code modified for RAVEN +0588 if modelHasSubsystems +0589 modelSBML.groups_group.groups_kind = 'partonomy'; +0590 modelSBML.groups_group.sboTerm = 633; +0591 tmpStruct=modelSBML.groups_group; +0592 +0593 rxns=strcat('R_',model.rxns); +0594 if ~any(cellfun(@iscell,model.subSystems)) +0595 if ~any(~cellfun(@isempty,model.subSystems)) +0596 subSystems = {}; +0597 else +0598 subSystems = setdiff(model.subSystems,''); +0599 end +0600 else +0601 orderedSubs = cellfun(@(x) columnVector(x),model.subSystems,'UniformOUtput',false); +0602 subSystems = setdiff(vertcat(orderedSubs{:}),''); +0603 end +0604 if isempty(subSystems) +0605 subSystems = {}; +0606 end +0607 if ~isempty(subSystems) +0608 %Build the groups for the group package +0609 groupIDs = strcat('group',cellfun(@num2str, num2cell(1:length(subSystems)),'UniformOutput',false)); +0610 for i = 1:length(subSystems) +0611 cgroup = tmpStruct; +0612 if ~any(cellfun(@iscell,model.subSystems)) +0613 present = ismember(model.subSystems,subSystems{i}); +0614 else +0615 present = cellfun(@(x) any(ismember(x,subSystems{i})),model.subSystems); +0616 end +0617 groupMembers = rxns(present); +0618 for j = 1:numel(groupMembers) +0619 cMember = tmpStruct.groups_member; +0620 cMember.groups_idRef = groupMembers{j}; +0621 if j == 1 +0622 cgroup.groups_member = cMember; +0623 else +0624 cgroup.groups_member(j) = cMember; +0625 end +0626 end +0627 cgroup.groups_id = groupIDs{i}; +0628 cgroup.groups_name = subSystems{i}; +0629 if i == 1 +0630 modelSBML.groups_group = cgroup; +0631 else +0632 modelSBML.groups_group(i) = cgroup; +0633 end +0634 end +0635 end +0636 end +0637 +0638 %Prepare fbc_objective subfield +0639 +0640 modelSBML.fbc_objective.fbc_type='maximize'; +0641 modelSBML.fbc_objective.fbc_id='obj'; +0642 +0643 ind=find(model.c); +0644 +0645 if isempty(ind) +0646 modelSBML.fbc_objective.fbc_fluxObjective.fbc_coefficient=0; +0647 else +0648 for i=1:length(ind) +0649 %Copy the default values to the next index as long as it is not the +0650 %last one +0651 if i<numel(ind) +0652 modelSBML.reaction(i+1)=modelSBML.reaction(i); +0653 end +0654 values=model.c(model.c~=0); +0655 modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction=modelSBML.reaction(ind(i)).id; +0656 modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient=values(i); +0657 modelSBML.fbc_objective(i).fbc_fluxObjective.isSetfbc_coefficient=1; +0658 end +0659 end +0660 +0661 modelSBML.fbc_activeObjective=modelSBML.fbc_objective.fbc_id; +0662 +0663 fbcStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/fbc/version',num2str(sbmlPackageVersions(1))]; +0664 if modelHasSubsystems +0665 groupStr=['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/groups/version',num2str(sbmlPackageVersions(2))]; +0666 modelSBML.namespaces=struct('prefix',{'','fbc','groups'},... +0667 'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],... +0668 fbcStr,groupStr}); +0669 else +0670 modelSBML.namespaces=struct('prefix',{'','fbc'},... +0671 'uri',{['http://www.sbml.org/sbml/level', num2str(sbmlLevel), '/version', num2str(sbmlVersion), '/core'],... +0672 fbcStr}); +0673 end 0674 -0675 [ravenDir,prevDir]=findRAVENroot(); -0676 fileName=checkFileExistence(fileName,1,true,false); -0677 -0678 OutputSBML_RAVEN(modelSBML,fileName,1,0,[1,0]); -0679 end -0680 -0681 -0682 function modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions) -0683 %Returns the blank SBML model structure by using appropriate libSBML -0684 %functions. This creates structure by considering three levels +0675 if sbmlPackageVersions(1) == 2 +0676 modelSBML.fbc_strict=1; +0677 modelSBML.isSetfbc_strict = 1; +0678 end +0679 +0680 modelSBML.rule=[]; +0681 modelSBML.constraint=[]; +0682 +0683 [ravenDir,prevDir]=findRAVENroot(); +0684 fileName=checkFileExistence(fileName,1,true,false); 0685 -0686 sbmlFieldNames=getStructureFieldnames('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0687 sbmlDefaultValues=getDefaultValues('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0686 OutputSBML_RAVEN(modelSBML,fileName,1,0,[1,0]); +0687 end 0688 -0689 for i=1:numel(sbmlFieldNames) -0690 modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i}; -0691 sbmlSubfieldNames=getStructureFieldnames(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0692 sbmlSubfieldValues=getDefaultValues(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0693 if ~strcmp(sbmlFieldNames{1,i},'event') && ~strcmp(sbmlFieldNames{1,i},'functionDefinition') && ~strcmp(sbmlFieldNames{1,i},'initialAssignment') -0694 for j=1:numel(sbmlSubfieldNames) -0695 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j})=sbmlSubfieldValues{1,j}; -0696 sbmlSubsubfieldNames=getStructureFieldnames(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0697 sbmlSubsubfieldValues=getDefaultValues(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0698 if ~strcmp(sbmlSubfieldNames{1,j},'modifier') && ~strcmp(sbmlSubfieldNames{1,j},'kineticLaw') -0699 for k=1:numel(sbmlSubsubfieldNames) -0700 %'compartment' and 'species' fields are not supposed to -0701 %have their standalone structures if they are subfields -0702 %or subsubfields -0703 if ~strcmp(sbmlSubfieldNames{1,j},'compartment') && ~strcmp(sbmlSubfieldNames{1,j},'species') -0704 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k})=sbmlSubsubfieldValues{1,k}; -0705 end -0706 %If it is fbc_association in the third level, we need -0707 %to establish the fourth level, since libSBML requires -0708 %it -0709 if strcmp(sbmlSubsubfieldNames{1,k},'fbc_association') -0710 fbc_associationFieldNames=getStructureFieldnames('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0711 fbc_associationFieldValues=getDefaultValues('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0712 for l=1:numel(fbc_associationFieldNames) -0713 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k}).(fbc_associationFieldNames{1,l})=fbc_associationFieldValues{1,l}; -0714 end -0715 end -0716 end -0717 end -0718 end -0719 end -0720 if ~isstruct(modelSBML.(sbmlFieldNames{1,i})) -0721 modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i}; -0722 end -0723 end -0724 -0725 modelSBML.unitDefinition.id='mmol_per_gDW_per_hr'; -0726 -0727 unitFieldNames=getStructureFieldnames('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0728 unitDefaultValues=getDefaultValues('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); -0729 -0730 kinds={'mole','gram','second'}; -0731 exponents=[1 -1 -1]; -0732 scales=[-3 0 0]; -0733 multipliers=[1 1 1*60*60]; +0689 +0690 function modelSBML=getSBMLStructure(sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions) +0691 %Returns the blank SBML model structure by using appropriate libSBML +0692 %functions. This creates structure by considering three levels +0693 +0694 sbmlFieldNames=getStructureFieldnames('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0695 sbmlDefaultValues=getDefaultValues('model',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0696 +0697 for i=1:numel(sbmlFieldNames) +0698 modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i}; +0699 sbmlSubfieldNames=getStructureFieldnames(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0700 sbmlSubfieldValues=getDefaultValues(sbmlFieldNames{1,i},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0701 if ~strcmp(sbmlFieldNames{1,i},'event') && ~strcmp(sbmlFieldNames{1,i},'functionDefinition') && ~strcmp(sbmlFieldNames{1,i},'initialAssignment') +0702 for j=1:numel(sbmlSubfieldNames) +0703 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j})=sbmlSubfieldValues{1,j}; +0704 sbmlSubsubfieldNames=getStructureFieldnames(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0705 sbmlSubsubfieldValues=getDefaultValues(sbmlSubfieldNames{1,j},sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0706 if ~strcmp(sbmlSubfieldNames{1,j},'modifier') && ~strcmp(sbmlSubfieldNames{1,j},'kineticLaw') +0707 for k=1:numel(sbmlSubsubfieldNames) +0708 %'compartment' and 'species' fields are not supposed to +0709 %have their standalone structures if they are subfields +0710 %or subsubfields +0711 if ~strcmp(sbmlSubfieldNames{1,j},'compartment') && ~strcmp(sbmlSubfieldNames{1,j},'species') +0712 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k})=sbmlSubsubfieldValues{1,k}; +0713 end +0714 %If it is fbc_association in the third level, we need +0715 %to establish the fourth level, since libSBML requires +0716 %it +0717 if strcmp(sbmlSubsubfieldNames{1,k},'fbc_association') +0718 fbc_associationFieldNames=getStructureFieldnames('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0719 fbc_associationFieldValues=getDefaultValues('fbc_association',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0720 for l=1:numel(fbc_associationFieldNames) +0721 modelSBML.(sbmlFieldNames{1,i}).(sbmlSubfieldNames{1,j}).(sbmlSubsubfieldNames{1,k}).(fbc_associationFieldNames{1,l})=fbc_associationFieldValues{1,l}; +0722 end +0723 end +0724 end +0725 end +0726 end +0727 end +0728 if ~isstruct(modelSBML.(sbmlFieldNames{1,i})) +0729 modelSBML.(sbmlFieldNames{1,i})=sbmlDefaultValues{1,i}; +0730 end +0731 end +0732 +0733 modelSBML.unitDefinition.id='mmol_per_gDW_per_hr'; 0734 -0735 for i=1:numel(unitFieldNames) -0736 modelSBML.unitDefinition.unit(1).(unitFieldNames{1,i})=unitDefaultValues{1,i}; -0737 for j=1:3 -0738 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=unitDefaultValues{1,i}; -0739 if strcmp(unitFieldNames{1,i},'kind') -0740 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=kinds{j}; -0741 elseif strcmp(unitFieldNames{1,i},'exponent') -0742 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=exponents(j); -0743 elseif strcmp(unitFieldNames{1,i},'scale') -0744 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=scales(j); -0745 elseif strcmp(unitFieldNames{1,i},'multiplier') -0746 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=multipliers(j); -0747 end -0748 end -0749 end -0750 end -0751 -0752 function miriamString=getMiriam(miriamStruct) -0753 %Returns a string with list elements for a miriam structure ('<rdf:li -0754 %rdf:resource="https://identifiers.org/go/GO:0005739"/>' for example). This -0755 %is just to speed up things since this is done many times during the -0756 %exporting -0757 -0758 miriamString=''; -0759 if isfield(miriamStruct,'name') -0760 for i=1:numel(miriamStruct.name) -0761 miriamString=[miriamString '<rdf:li rdf:resource="https://identifiers.org/' miriamStruct.name{i} '/' miriamStruct.value{i} '"/>']; -0762 end -0763 end -0764 end +0735 unitFieldNames=getStructureFieldnames('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0736 unitDefaultValues=getDefaultValues('unit',sbmlLevel,sbmlVersion,sbmlPackages,sbmlPackageVersions); +0737 +0738 kinds={'mole','gram','second'}; +0739 exponents=[1 -1 -1]; +0740 scales=[-3 0 0]; +0741 multipliers=[1 1 1*60*60]; +0742 +0743 for i=1:numel(unitFieldNames) +0744 modelSBML.unitDefinition.unit(1).(unitFieldNames{1,i})=unitDefaultValues{1,i}; +0745 for j=1:3 +0746 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=unitDefaultValues{1,i}; +0747 if strcmp(unitFieldNames{1,i},'kind') +0748 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=kinds{j}; +0749 elseif strcmp(unitFieldNames{1,i},'exponent') +0750 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=exponents(j); +0751 elseif strcmp(unitFieldNames{1,i},'scale') +0752 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=scales(j); +0753 elseif strcmp(unitFieldNames{1,i},'multiplier') +0754 modelSBML.unitDefinition.unit(j).(unitFieldNames{1,i})=multipliers(j); +0755 end +0756 end +0757 end +0758 end +0759 +0760 function miriamString=getMiriam(miriamStruct) +0761 %Returns a string with list elements for a miriam structure ('<rdf:li +0762 %rdf:resource="https://identifiers.org/go/GO:0005739"/>' for example). This +0763 %is just to speed up things since this is done many times during the +0764 %exporting 0765 -0766 function [tmp_Rxn]=addReactantsProducts(model,sbmlModel,i) -0767 %This function provides reactants and products for particular reaction. The -0768 %function was 'borrowed' from writeSBML in COBRA toolbox, lines 663-679 -0769 -0770 met_idx = find(model.S(:, i)); -0771 tmp_Rxn.product=[]; -0772 tmp_Rxn.reactant=[]; -0773 for j_met=1:size(met_idx,1) -0774 tmp_idx = met_idx(j_met,1); -0775 sbml_tmp_species_ref.species = sbmlModel.species(tmp_idx).id; -0776 met_stoich = model.S(tmp_idx, i); -0777 sbml_tmp_species_ref.stoichiometry = abs(met_stoich); -0778 sbml_tmp_species_ref.isSetStoichiometry=1; -0779 sbml_tmp_species_ref.constant=1; -0780 if (met_stoich > 0) -0781 tmp_Rxn.product = [ tmp_Rxn.product, sbml_tmp_species_ref ]; -0782 else -0783 tmp_Rxn.reactant = [ tmp_Rxn.reactant, sbml_tmp_species_ref]; -0784 end -0785 end -0786 end -0787 -0788 function vecT = columnVector(vec) -0789 % Code below taken from COBRA Toolbox under GNU General Public License v3.0 -0790 % license file in readme/GPL.MD. -0791 % -0792 % Converts a vector to a column vector -0793 % -0794 % USAGE: -0795 % -0796 % vecT = columnVector(vec) -0797 % -0798 % INPUT: -0799 % vec: a vector -0800 % -0801 % OUTPUT: -0802 % vecT: a column vector -0803 -0804 [n, m] = size(vec); -0805 -0806 if n < m -0807 vecT = vec'; -0808 else -0809 vecT = vec; -0810 end -0811 end +0766 miriamString=''; +0767 if isfield(miriamStruct,'name') +0768 for i=1:numel(miriamStruct.name) +0769 miriamString=[miriamString '<rdf:li rdf:resource="https://identifiers.org/' miriamStruct.name{i} '/' miriamStruct.value{i} '"/>']; +0770 end +0771 end +0772 end +0773 +0774 function [tmp_Rxn]=addReactantsProducts(model,sbmlModel,i) +0775 %This function provides reactants and products for particular reaction. The +0776 %function was 'borrowed' from writeSBML in COBRA toolbox, lines 663-679 +0777 +0778 met_idx = find(model.S(:, i)); +0779 tmp_Rxn.product=[]; +0780 tmp_Rxn.reactant=[]; +0781 for j_met=1:size(met_idx,1) +0782 tmp_idx = met_idx(j_met,1); +0783 sbml_tmp_species_ref.species = sbmlModel.species(tmp_idx).id; +0784 met_stoich = model.S(tmp_idx, i); +0785 sbml_tmp_species_ref.stoichiometry = abs(met_stoich); +0786 sbml_tmp_species_ref.isSetStoichiometry=1; +0787 sbml_tmp_species_ref.constant=1; +0788 if (met_stoich > 0) +0789 tmp_Rxn.product = [ tmp_Rxn.product, sbml_tmp_species_ref ]; +0790 else +0791 tmp_Rxn.reactant = [ tmp_Rxn.reactant, sbml_tmp_species_ref]; +0792 end +0793 end +0794 end +0795 +0796 function vecT = columnVector(vec) +0797 % Code below taken from COBRA Toolbox under GNU General Public License v3.0 +0798 % license file in readme/GPL.MD. +0799 % +0800 % Converts a vector to a column vector +0801 % +0802 % USAGE: +0803 % +0804 % vecT = columnVector(vec) +0805 % +0806 % INPUT: +0807 % vec: a vector +0808 % +0809 % OUTPUT: +0810 % vecT: a column vector +0811 +0812 [n, m] = size(vec); +0813 +0814 if n < m +0815 vecT = vec'; +0816 else +0817 vecT = vec; +0818 end +0819 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/io/importModel.html b/doc/io/importModel.html index 7773dd1a..8bad98ec 100644 --- a/doc/io/importModel.html +++ b/doc/io/importModel.html @@ -75,6 +75,7 @@

DESCRIPTION ^SOURCE CODE ^% geneComps compartments for genes 0048 % geneMiriams structure with MIRIAM information about the genes 0049 % geneShortNames gene alternative names (e.g. ERG10) -0050 % metNames metabolite description -0051 % metComps compartments for metabolites -0052 % inchis InChI-codes for metabolites -0053 % metFormulas metabolite chemical formula -0054 % metMiriams structure with MIRIAM information about the metabolites -0055 % metCharges metabolite charge -0056 % unconstrained true if the metabolite is an exchange metabolite -0057 % -0058 % A number of consistency checks are performed in order to ensure that the -0059 % model is valid. Take these warnings seriously and modify the model -0060 % structure to solve them. -0061 % -0062 % Usage: model = importModel(fileName, removeExcMets, isSBML2COBRA, supressWarnings) -0063 -0064 if nargin<1 || isempty(fileName) -0065 [fileName, pathName] = uigetfile({'*.xml;*.sbml'}, 'Please select the model file'); -0066 if fileName == 0 -0067 error('You should select a model file') -0068 else -0069 fileName = fullfile(pathName,fileName); -0070 end -0071 end -0072 fileName=char(fileName); -0073 if nargin<2 || isempty(removeExcMets) -0074 removeExcMets=true; -0075 end -0076 -0077 if nargin<3 || isempty(isSBML2COBRA) -0078 isSBML2COBRA=false; -0079 end -0080 -0081 if nargin<4 -0082 supressWarnings=false; -0083 end -0084 -0085 if ~isfile(fileName) -0086 error('SBML file %s cannot be found',string(fileName)); -0087 end -0088 -0089 %This is to match the order of the fields to those you get from importing -0090 %from Excel -0091 model=[]; -0092 model.id=[]; -0093 model.name=[]; -0094 model.annotation=[]; -0095 model.rxns={}; -0096 model.mets={}; -0097 model.S=[]; -0098 model.lb=[]; -0099 model.ub=[]; -0100 model.rev=[]; -0101 model.c=[]; -0102 model.b=[]; -0103 model.comps={}; -0104 model.compNames={}; -0105 model.compOutside={}; -0106 model.compMiriams={}; -0107 model.rxnNames={}; -0108 model.rxnComps=[]; -0109 model.grRules={}; -0110 model.rxnGeneMat=[]; -0111 model.subSystems={}; -0112 model.eccodes={}; -0113 model.rxnMiriams={}; -0114 model.rxnNotes={}; -0115 model.rxnReferences={}; -0116 model.rxnConfidenceScores=[]; -0117 model.genes={}; -0118 model.geneComps=[]; -0119 model.geneMiriams={}; -0120 model.geneShortNames={}; -0121 model.metNames={}; -0122 model.metComps=[]; -0123 model.inchis={}; -0124 model.metFormulas={}; -0125 model.metMiriams={}; -0126 model.metCharges=[]; -0127 model.unconstrained=[]; -0128 -0129 %Load the model using libSBML -0130 [ravenDir,prevDir]=findRAVENroot(); -0131 fileName=checkFileExistence(fileName,1); -0132 modelSBML = TranslateSBML_RAVEN(fileName,0,0,[1 1]); -0133 -0134 if isempty(modelSBML) -0135 EM='There is a problem with the SBML file. Try using the SBML Validator at http://sbml.org/Facilities/Validator'; -0136 dispEM(EM); -0137 end -0138 -0139 %Remove the preceding strings for reactions, compartments and -0140 %reactants/products in 'reaction' field. The strings for metabolites, genes -0141 %and complexes are not removed, as we will need them later to identify them -0142 %from 'species' field -0143 for i=1:numel(modelSBML.reaction) -0144 modelSBML.reaction(i).name=regexprep(modelSBML.reaction(i).name,'^R_',''); -0145 modelSBML.reaction(i).id=regexprep(modelSBML.reaction(i).id,'^R_',''); -0146 if isfield(modelSBML.reaction(i),'compartment') -0147 modelSBML.reaction(i).compartment=regexprep(modelSBML.reaction(i).compartment,'^C_',''); -0148 end -0149 for j=1:numel(modelSBML.reaction(i).reactant) -0150 modelSBML.reaction(i).reactant(j).species=regexprep(modelSBML.reaction(i).reactant(j).species,'^M_',''); -0151 end -0152 for j=1:numel(modelSBML.reaction(i).product) -0153 modelSBML.reaction(i).product(j).species=regexprep(modelSBML.reaction(i).product(j).species,'^M_',''); -0154 end -0155 end -0156 -0157 %Retrieve compartment names and IDs -0158 compartmentNames=cell(numel(modelSBML.compartment),1); -0159 compartmentIDs=cell(numel(modelSBML.compartment),1); -0160 compartmentOutside=cell(numel(modelSBML.compartment),1); -0161 compartmentMiriams=cell(numel(modelSBML.compartment),1); -0162 -0163 if isfield(modelSBML.compartment,'sboTerm') && numel(unique([modelSBML.compartment.sboTerm])) == 1 -0164 %If all the SBO terms are identical, don't add them to compMiriams -0165 modelSBML.compartment = rmfield(modelSBML.compartment,'sboTerm'); -0166 end -0167 -0168 for i=1:numel(modelSBML.compartment) -0169 compartmentNames{i}=modelSBML.compartment(i).name; -0170 compartmentIDs{i}=regexprep(modelSBML.compartment(i).id,'^C_',''); -0171 if isfield(modelSBML.compartment(i),'outside') -0172 if ~isempty(modelSBML.compartment(i).outside) -0173 compartmentOutside{i}=regexprep(modelSBML.compartment(i).outside,'^C_',''); -0174 else -0175 compartmentOutside{i}=''; -0176 end -0177 else -0178 compartmentOutside{i}=[]; -0179 end -0180 -0181 if isfield(modelSBML.compartment(i),'annotation') -0182 compartmentMiriams{i}=parseMiriam(modelSBML.compartment(i).annotation); -0183 else -0184 compartmentMiriams{i}=[]; -0185 end -0186 -0187 if isfield(modelSBML.compartment(i),'sboTerm') && ~(modelSBML.compartment(i).sboTerm==-1) -0188 compartmentMiriams{i} = addSBOtoMiriam(compartmentMiriams{i},modelSBML.compartment(i).sboTerm); -0189 end -0190 end -0191 -0192 %If there are no compartment names then use compartment id as name -0193 if all(cellfun(@isempty,compartmentNames)) -0194 compartmentNames=compartmentIDs; -0195 end -0196 -0197 %Retrieve info on metabolites, genes, complexes -0198 metaboliteNames={}; -0199 metaboliteIDs={}; -0200 metaboliteCompartments={}; -0201 metaboliteUnconstrained=[]; -0202 metaboliteFormula={}; -0203 metaboliteInChI={}; -0204 metaboliteMiriams={}; -0205 metaboliteCharges=[]; -0206 -0207 geneNames={}; -0208 geneIDs={}; -0209 geneMiriams={}; -0210 geneShortNames={}; -0211 geneCompartments={}; -0212 complexIDs={}; -0213 complexNames={}; -0214 -0215 %If the file is not a COBRA Toolbox model. According to the format -0216 %specified in the yeast consensus model both metabolites and genes are a -0217 %type of 'species'. The metabolites have names starting with 'M_' and genes -0218 %with 'E_' -0219 geneSBOs = []; -0220 metSBOs = []; -0221 %Regex of compartment names, later to be used to remove from metabolite -0222 %names if present as suffix. -0223 regexCompNames = ['\s?\[((' strjoin({modelSBML.compartment.name},')|(') '))\]$']; -0224 for i=1:numel(modelSBML.species) -0225 if ~isSBML2COBRA -0226 if length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:2),'E_') -0227 geneNames{numel(geneNames)+1,1}=modelSBML.species(i).name; -0228 -0229 %The "E_" is included in the ID. This is because it's only used -0230 %internally in this file and it makes the matching a little -0231 %smoother -0232 geneIDs{numel(geneIDs)+1,1}=modelSBML.species(i).id; -0233 geneCompartments{numel(geneCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); -0234 -0235 %Get Miriam structure -0236 if isfield(modelSBML.species(i),'annotation') -0237 %Get Miriam info -0238 geneMiriam=parseMiriam(modelSBML.species(i).annotation); -0239 geneMiriams{numel(geneMiriams)+1,1}=geneMiriam; -0240 else -0241 geneMiriams{numel(geneMiriams)+1,1}=[]; -0242 end -0243 -0244 %Protein short names (for example ERG10) are saved as SHORT -0245 %NAME: NAME in the notes-section of metabolites for SBML Level -0246 %2 and as PROTEIN_ASSOCIATION for each reaction in SBML Level 2 -0247 %COBRA Toolbox format. For now only the SHORT NAME is loaded -0248 %and no mapping takes place -0249 if isfield(modelSBML.species(i),'notes') -0250 geneShortNames{numel(geneShortNames)+1,1}=parseNote(modelSBML.species(i).notes,'SHORT NAME'); -0251 else -0252 geneShortNames{numel(geneShortNames)+1,1}=''; -0253 end -0254 -0255 %Get SBO term -0256 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) -0257 geneSBOs(end+1,1) = modelSBML.species(i).sboTerm; -0258 end -0259 elseif length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:3),'Cx_') -0260 %If it's a complex keep the ID and name -0261 complexIDs=[complexIDs;modelSBML.species(i).id]; -0262 complexNames=[complexNames;modelSBML.species(i).name]; -0263 else -0264 %If it is not gene or complex, then it must be a metabolite -0265 metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name; -0266 metaboliteIDs{numel(metaboliteIDs)+1,1}=regexprep(modelSBML.species(i).id,'^M_',''); -0267 metaboliteCompartments{numel(metaboliteCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); -0268 metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition; -0269 -0270 %For each metabolite retrieve the formula and the InChI code if -0271 %available First add the InChI code and the formula from the -0272 %InChI. This allows for overwriting the formula by setting the -0273 %actual formula field -0274 if ~isempty(modelSBML.species(i).annotation) -0275 %Get the formula if available -0276 startString='>InChI='; -0277 endString='</in:inchi>'; -0278 formStart=strfind(modelSBML.species(i).annotation,startString); -0279 if isempty(formStart) -0280 startString='InChI='; -0281 endString='"/>'; -0282 end -0283 formStart=strfind(modelSBML.species(i).annotation,startString); -0284 if ~isempty(formStart) -0285 formEnd=strfind(modelSBML.species(i).annotation,endString); -0286 formEndIndex=find(formEnd>formStart, 1 ); -0287 formula=modelSBML.species(i).annotation(formStart+numel(startString):formEnd(formEndIndex)-1); -0288 metaboliteInChI{numel(metaboliteInChI)+1,1}=formula; -0289 -0290 %The composition is most often present between the -0291 %first and second "/" in the model. In some simple -0292 %molecules, such as salts, there is no second "/". The -0293 %formula is then assumed to be to the end of the string -0294 compositionIndexes=strfind(formula,'/'); -0295 if numel(compositionIndexes)>1 -0296 metaboliteFormula{numel(metaboliteFormula)+1,1}=... -0297 formula(compositionIndexes(1)+1:compositionIndexes(2)-1); -0298 else -0299 if numel(compositionIndexes)==1 -0300 %Probably a simple molecule which can have only -0301 %one conformation -0302 metaboliteFormula{numel(metaboliteFormula)+1,1}=... -0303 formula(compositionIndexes(1)+1:numel(formula)); -0304 else -0305 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0306 end -0307 end -0308 elseif isfield(modelSBML.species(i),'fbc_chemicalFormula') -0309 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; -0310 if ~isempty(modelSBML.species(i).fbc_chemicalFormula) -0311 %Cannot extract InChi from formula, so remains -0312 %empty -0313 metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).fbc_chemicalFormula; -0314 else -0315 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0316 end -0317 else -0318 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; -0319 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0320 end -0321 -0322 %Get Miriam info -0323 metMiriam=parseMiriam(modelSBML.species(i).annotation); -0324 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam; -0325 else -0326 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; -0327 if isfield(modelSBML.species(i),'notes') -0328 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); -0329 else -0330 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0331 end -0332 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=[]; -0333 end -0334 if ~isempty(modelSBML.species(i).notes) -0335 if ~isfield(modelSBML.species(i),'annotation') -0336 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); -0337 end -0338 elseif ~isfield(modelSBML.species(i),'annotation') -0339 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0340 end -0341 %Get SBO term -0342 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) -0343 metSBOs(end+1,1) = modelSBML.species(i).sboTerm; -0344 end -0345 end -0346 -0347 elseif isSBML2COBRA -0348 %The metabolite names are assumed to be M_NAME_COMPOSITION or -0349 %_NAME_COMPOSITION or NAME_COMPOSITION or NAME. Regular expressions -0350 %are used that only NAME_COMPOSITION or NAME would be possible -0351 -0352 modelSBML.species(i).name=regexprep(modelSBML.species(i).name,'^M_',''); -0353 modelSBML.species(i).name=regexprep(modelSBML.species(i).name,'^_',''); -0354 underscoreIndex=strfind(modelSBML.species(i).name,'_'); -0355 -0356 metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name; -0357 -0358 metaboliteIDs{numel(metaboliteIDs)+1,1}=regexprep(modelSBML.species(i).id,'^M_',''); -0359 metaboliteCompartments{numel(metaboliteCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); +0050 % proteinNames protein associated to each gene +0051 % metNames metabolite description +0052 % metComps compartments for metabolites +0053 % inchis InChI-codes for metabolites +0054 % metFormulas metabolite chemical formula +0055 % metMiriams structure with MIRIAM information about the metabolites +0056 % metCharges metabolite charge +0057 % unconstrained true if the metabolite is an exchange metabolite +0058 % +0059 % A number of consistency checks are performed in order to ensure that the +0060 % model is valid. Take these warnings seriously and modify the model +0061 % structure to solve them. +0062 % +0063 % Usage: model = importModel(fileName, removeExcMets, isSBML2COBRA, supressWarnings) +0064 +0065 if nargin<1 || isempty(fileName) +0066 [fileName, pathName] = uigetfile({'*.xml;*.sbml'}, 'Please select the model file'); +0067 if fileName == 0 +0068 error('You should select a model file') +0069 else +0070 fileName = fullfile(pathName,fileName); +0071 end +0072 end +0073 fileName=char(fileName); +0074 if nargin<2 || isempty(removeExcMets) +0075 removeExcMets=true; +0076 end +0077 +0078 if nargin<3 || isempty(isSBML2COBRA) +0079 isSBML2COBRA=false; +0080 end +0081 +0082 if nargin<4 +0083 supressWarnings=false; +0084 end +0085 +0086 if ~isfile(fileName) +0087 error('SBML file %s cannot be found',string(fileName)); +0088 end +0089 +0090 %This is to match the order of the fields to those you get from importing +0091 %from Excel +0092 model=[]; +0093 model.id=[]; +0094 model.name=[]; +0095 model.annotation=[]; +0096 model.rxns={}; +0097 model.mets={}; +0098 model.S=[]; +0099 model.lb=[]; +0100 model.ub=[]; +0101 model.rev=[]; +0102 model.c=[]; +0103 model.b=[]; +0104 model.comps={}; +0105 model.compNames={}; +0106 model.compOutside={}; +0107 model.compMiriams={}; +0108 model.rxnNames={}; +0109 model.rxnComps=[]; +0110 model.grRules={}; +0111 model.rxnGeneMat=[]; +0112 model.subSystems={}; +0113 model.eccodes={}; +0114 model.rxnMiriams={}; +0115 model.rxnNotes={}; +0116 model.rxnReferences={}; +0117 model.rxnConfidenceScores=[]; +0118 model.genes={}; +0119 model.geneComps=[]; +0120 model.geneMiriams={}; +0121 model.geneShortNames={}; +0122 model.proteinNames={}; +0123 model.metNames={}; +0124 model.metComps=[]; +0125 model.inchis={}; +0126 model.metFormulas={}; +0127 model.metMiriams={}; +0128 model.metCharges=[]; +0129 model.unconstrained=[]; +0130 +0131 %Load the model using libSBML +0132 [ravenDir,prevDir]=findRAVENroot(); +0133 fileName=checkFileExistence(fileName,1); +0134 modelSBML = TranslateSBML_RAVEN(fileName,0,0,[1 1]); +0135 +0136 if isempty(modelSBML) +0137 EM='There is a problem with the SBML file. Try using the SBML Validator at http://sbml.org/Facilities/Validator'; +0138 dispEM(EM); +0139 end +0140 +0141 %Remove the preceding strings for reactions, compartments and +0142 %reactants/products in 'reaction' field. The strings for metabolites, genes +0143 %and complexes are not removed, as we will need them later to identify them +0144 %from 'species' field +0145 for i=1:numel(modelSBML.reaction) +0146 modelSBML.reaction(i).name=regexprep(modelSBML.reaction(i).name,'^R_',''); +0147 modelSBML.reaction(i).id=regexprep(modelSBML.reaction(i).id,'^R_',''); +0148 if isfield(modelSBML.reaction(i),'compartment') +0149 modelSBML.reaction(i).compartment=regexprep(modelSBML.reaction(i).compartment,'^C_',''); +0150 end +0151 for j=1:numel(modelSBML.reaction(i).reactant) +0152 modelSBML.reaction(i).reactant(j).species=regexprep(modelSBML.reaction(i).reactant(j).species,'^M_',''); +0153 end +0154 for j=1:numel(modelSBML.reaction(i).product) +0155 modelSBML.reaction(i).product(j).species=regexprep(modelSBML.reaction(i).product(j).species,'^M_',''); +0156 end +0157 end +0158 +0159 %Retrieve compartment names and IDs +0160 compartmentNames=cell(numel(modelSBML.compartment),1); +0161 compartmentIDs=cell(numel(modelSBML.compartment),1); +0162 compartmentOutside=cell(numel(modelSBML.compartment),1); +0163 compartmentMiriams=cell(numel(modelSBML.compartment),1); +0164 +0165 if isfield(modelSBML.compartment,'sboTerm') && numel(unique([modelSBML.compartment.sboTerm])) == 1 +0166 %If all the SBO terms are identical, don't add them to compMiriams +0167 modelSBML.compartment = rmfield(modelSBML.compartment,'sboTerm'); +0168 end +0169 +0170 for i=1:numel(modelSBML.compartment) +0171 compartmentNames{i}=modelSBML.compartment(i).name; +0172 compartmentIDs{i}=regexprep(modelSBML.compartment(i).id,'^C_',''); +0173 if isfield(modelSBML.compartment(i),'outside') +0174 if ~isempty(modelSBML.compartment(i).outside) +0175 compartmentOutside{i}=regexprep(modelSBML.compartment(i).outside,'^C_',''); +0176 else +0177 compartmentOutside{i}=''; +0178 end +0179 else +0180 compartmentOutside{i}=[]; +0181 end +0182 +0183 if isfield(modelSBML.compartment(i),'annotation') +0184 compartmentMiriams{i}=parseMiriam(modelSBML.compartment(i).annotation); +0185 else +0186 compartmentMiriams{i}=[]; +0187 end +0188 +0189 if isfield(modelSBML.compartment(i),'sboTerm') && ~(modelSBML.compartment(i).sboTerm==-1) +0190 compartmentMiriams{i} = addSBOtoMiriam(compartmentMiriams{i},modelSBML.compartment(i).sboTerm); +0191 end +0192 end +0193 +0194 %If there are no compartment names then use compartment id as name +0195 if all(cellfun(@isempty,compartmentNames)) +0196 compartmentNames=compartmentIDs; +0197 end +0198 +0199 %Retrieve info on metabolites, genes, complexes +0200 metaboliteNames={}; +0201 metaboliteIDs={}; +0202 metaboliteCompartments={}; +0203 metaboliteUnconstrained=[]; +0204 metaboliteFormula={}; +0205 metaboliteInChI={}; +0206 metaboliteMiriams={}; +0207 metaboliteCharges=[]; +0208 +0209 geneNames={}; +0210 geneIDs={}; +0211 geneMiriams={}; +0212 geneShortNames={}; +0213 proteinNames={}; +0214 geneCompartments={}; +0215 complexIDs={}; +0216 complexNames={}; +0217 +0218 %If the file is not a COBRA Toolbox model. According to the format +0219 %specified in the yeast consensus model both metabolites and genes are a +0220 %type of 'species'. The metabolites have names starting with 'M_' and genes +0221 %with 'E_' +0222 geneSBOs = []; +0223 metSBOs = []; +0224 %Regex of compartment names, later to be used to remove from metabolite +0225 %names if present as suffix. +0226 regexCompNames = ['\s?\[((' strjoin({modelSBML.compartment.name},')|(') '))\]$']; +0227 for i=1:numel(modelSBML.species) +0228 if ~isSBML2COBRA +0229 if length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:2),'E_') +0230 geneNames{numel(geneNames)+1,1}=modelSBML.species(i).name; +0231 +0232 %The "E_" is included in the ID. This is because it's only used +0233 %internally in this file and it makes the matching a little +0234 %smoother +0235 geneIDs{numel(geneIDs)+1,1}=modelSBML.species(i).id; +0236 geneCompartments{numel(geneCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); +0237 +0238 %Get Miriam structure +0239 if isfield(modelSBML.species(i),'annotation') +0240 %Get Miriam info +0241 geneMiriam=parseMiriam(modelSBML.species(i).annotation); +0242 geneMiriams{numel(geneMiriams)+1,1}=geneMiriam; +0243 else +0244 geneMiriams{numel(geneMiriams)+1,1}=[]; +0245 end +0246 +0247 %Protein short names (for example ERG10) are saved as SHORT +0248 %NAME: NAME in the notes-section of metabolites for SBML Level +0249 %2 and as PROTEIN_ASSOCIATION for each reaction in SBML Level 2 +0250 %COBRA Toolbox format. For now only the SHORT NAME is loaded +0251 %and no mapping takes place +0252 if isfield(modelSBML.species(i),'notes') +0253 geneShortNames{numel(geneShortNames)+1,1}=parseNote(modelSBML.species(i).notes,'SHORT NAME'); +0254 else +0255 geneShortNames{numel(geneShortNames)+1,1}=''; +0256 end +0257 +0258 %Get SBO term +0259 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) +0260 geneSBOs(end+1,1) = modelSBML.species(i).sboTerm; +0261 end +0262 elseif length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:3),'Cx_') +0263 %If it's a complex keep the ID and name +0264 complexIDs=[complexIDs;modelSBML.species(i).id]; +0265 complexNames=[complexNames;modelSBML.species(i).name]; +0266 else +0267 %If it is not gene or complex, then it must be a metabolite +0268 metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name; +0269 metaboliteIDs{numel(metaboliteIDs)+1,1}=regexprep(modelSBML.species(i).id,'^M_',''); +0270 metaboliteCompartments{numel(metaboliteCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); +0271 metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition; +0272 +0273 %For each metabolite retrieve the formula and the InChI code if +0274 %available First add the InChI code and the formula from the +0275 %InChI. This allows for overwriting the formula by setting the +0276 %actual formula field +0277 if ~isempty(modelSBML.species(i).annotation) +0278 %Get the formula if available +0279 startString='>InChI='; +0280 endString='</in:inchi>'; +0281 formStart=strfind(modelSBML.species(i).annotation,startString); +0282 if isempty(formStart) +0283 startString='InChI='; +0284 endString='"/>'; +0285 end +0286 formStart=strfind(modelSBML.species(i).annotation,startString); +0287 if ~isempty(formStart) +0288 formEnd=strfind(modelSBML.species(i).annotation,endString); +0289 formEndIndex=find(formEnd>formStart, 1 ); +0290 formula=modelSBML.species(i).annotation(formStart+numel(startString):formEnd(formEndIndex)-1); +0291 metaboliteInChI{numel(metaboliteInChI)+1,1}=formula; +0292 +0293 %The composition is most often present between the +0294 %first and second "/" in the model. In some simple +0295 %molecules, such as salts, there is no second "/". The +0296 %formula is then assumed to be to the end of the string +0297 compositionIndexes=strfind(formula,'/'); +0298 if numel(compositionIndexes)>1 +0299 metaboliteFormula{numel(metaboliteFormula)+1,1}=... +0300 formula(compositionIndexes(1)+1:compositionIndexes(2)-1); +0301 else +0302 if numel(compositionIndexes)==1 +0303 %Probably a simple molecule which can have only +0304 %one conformation +0305 metaboliteFormula{numel(metaboliteFormula)+1,1}=... +0306 formula(compositionIndexes(1)+1:numel(formula)); +0307 else +0308 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0309 end +0310 end +0311 elseif isfield(modelSBML.species(i),'fbc_chemicalFormula') +0312 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; +0313 if ~isempty(modelSBML.species(i).fbc_chemicalFormula) +0314 %Cannot extract InChi from formula, so remains +0315 %empty +0316 metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).fbc_chemicalFormula; +0317 else +0318 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0319 end +0320 else +0321 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; +0322 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0323 end +0324 +0325 %Get Miriam info +0326 metMiriam=parseMiriam(modelSBML.species(i).annotation); +0327 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam; +0328 else +0329 metaboliteInChI{numel(metaboliteInChI)+1,1}=''; +0330 if isfield(modelSBML.species(i),'notes') +0331 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); +0332 else +0333 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0334 end +0335 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=[]; +0336 end +0337 if ~isempty(modelSBML.species(i).notes) +0338 if ~isfield(modelSBML.species(i),'annotation') +0339 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); +0340 end +0341 elseif ~isfield(modelSBML.species(i),'annotation') +0342 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0343 end +0344 %Get SBO term +0345 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) +0346 metSBOs(end+1,1) = modelSBML.species(i).sboTerm; +0347 end +0348 end +0349 +0350 elseif isSBML2COBRA +0351 %The metabolite names are assumed to be M_NAME_COMPOSITION or +0352 %_NAME_COMPOSITION or NAME_COMPOSITION or NAME. Regular expressions +0353 %are used that only NAME_COMPOSITION or NAME would be possible +0354 +0355 modelSBML.species(i).name=regexprep(modelSBML.species(i).name,'^M_',''); +0356 modelSBML.species(i).name=regexprep(modelSBML.species(i).name,'^_',''); +0357 underscoreIndex=strfind(modelSBML.species(i).name,'_'); +0358 +0359 metaboliteNames{numel(metaboliteNames)+1,1}=modelSBML.species(i).name; 0360 -0361 %I think that COBRA doesn't set the boundary condition, but rather -0362 %uses name_b. Check for either -0363 metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition; -0364 if strcmp(metaboliteIDs{end}(max(end-1,1):end),'_b') -0365 metaboliteUnconstrained(end)=1; -0366 end -0367 -0368 %Get the formula -0369 if max(underscoreIndex)<length(modelSBML.species(i).name) -0370 metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).name(max(underscoreIndex)+1:length(modelSBML.species(i).name)); -0371 else -0372 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; -0373 end -0374 -0375 %The old COBRA version sometimes has composition information in the -0376 %notes instead -0377 if isfield(modelSBML.species(i),'notes') && ~isempty(parseNote(modelSBML.species(i).notes,'FORMULA')) -0378 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); -0379 end -0380 -0381 %Get Miriam info -0382 if ~isempty(modelSBML.species(i).annotation) -0383 metMiriam=parseMiriam(modelSBML.species(i).annotation); -0384 else -0385 metMiriam=[]; -0386 end -0387 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam; -0388 -0389 %Get SBO term -0390 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) -0391 metSBOs(end+1,1) = modelSBML.species(i).sboTerm; -0392 end -0393 end -0394 %The following lines are executed regardless isSBML2COBRA setting -0395 if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:2),'E_') -0396 if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:3),'Cx_') -0397 %Remove trailing [compartment] from metabolite name if present -0398 metaboliteNames{end,1}=regexprep(metaboliteNames{end,1},regexCompNames,''); -0399 metaboliteNames{end,1}=regexprep(metaboliteNames{end,1},'^M_',''); -0400 if isfield(modelSBML.species(i),'fbc_charge') -0401 if ~isempty(modelSBML.species(i).fbc_charge) && modelSBML.species(i).isSetfbc_charge -0402 metaboliteCharges(numel(metaboliteCharges)+1,1)=double(modelSBML.species(i).fbc_charge); -0403 else -0404 if isfield(modelSBML.species(i),'notes') -0405 if strfind(modelSBML.species(i).notes,'CHARGE') -0406 metaboliteCharges(numel(metaboliteCharges)+1,1)=str2double(parseNote(modelSBML.species(i).notes,'CHARGE')); -0407 else -0408 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; -0409 end -0410 else -0411 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; -0412 end -0413 end -0414 elseif isfield(modelSBML.species(i),'notes') -0415 if strfind(modelSBML.species(i).notes,'CHARGE') -0416 metaboliteCharges(numel(metaboliteCharges)+1,1)=str2double(parseNote(modelSBML.species(i).notes,'CHARGE')); -0417 else -0418 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; -0419 end -0420 else -0421 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; -0422 end -0423 %Additional information from FBC format Chemical formula -0424 if isfield(modelSBML.species(i),'fbc_chemicalFormula') -0425 if ~isempty(modelSBML.species(i).fbc_chemicalFormula) -0426 metaboliteFormula{numel(metaboliteFormula),1}=modelSBML.species(i).fbc_chemicalFormula; -0427 end -0428 end -0429 end -0430 end -0431 end -0432 -0433 %Add SBO terms to gene and metabolite miriam fields -0434 if numel(unique(geneSBOs)) > 1 % don't add if they're all identical -0435 for i = 1:numel(geneNames) -0436 geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},geneSBOs(i)); -0437 end -0438 end -0439 if numel(unique(metSBOs)) > 1 -0440 for i = 1:numel(metaboliteNames) -0441 metaboliteMiriams{i} = addSBOtoMiriam(metaboliteMiriams{i},metSBOs(i)); -0442 end -0443 end -0444 -0445 %Retrieve info on reactions -0446 reactionNames=cell(numel(modelSBML.reaction),1); -0447 reactionIDs=cell(numel(modelSBML.reaction),1); -0448 subsystems=cell(numel(modelSBML.reaction),1); -0449 eccodes=cell(numel(modelSBML.reaction),1); -0450 eccodes(:,:)=cellstr(''); -0451 rxnconfidencescores=NaN(numel(modelSBML.reaction),1); -0452 rxnreferences=cell(numel(modelSBML.reaction),1); -0453 rxnreferences(:,:)=cellstr(''); -0454 rxnnotes=cell(numel(modelSBML.reaction),1); -0455 rxnnotes(:,:)=cellstr(''); -0456 grRules=cell(numel(modelSBML.reaction),1); -0457 grRules(:,:)=cellstr(''); -0458 grRulesFromModifier=grRules; -0459 rxnComps=zeros(numel(modelSBML.reaction),1); -0460 rxnMiriams=cell(numel(modelSBML.reaction),1); -0461 reactionReversibility=zeros(numel(modelSBML.reaction),1); -0462 reactionUB=zeros(numel(modelSBML.reaction),1); -0463 reactionLB=zeros(numel(modelSBML.reaction),1); -0464 reactionObjective=zeros(numel(modelSBML.reaction),1); -0465 -0466 %Construct the stoichiometric matrix while the reaction info is read -0467 S=zeros(numel(metaboliteIDs),numel(modelSBML.reaction)); +0361 metaboliteIDs{numel(metaboliteIDs)+1,1}=regexprep(modelSBML.species(i).id,'^M_',''); +0362 metaboliteCompartments{numel(metaboliteCompartments)+1,1}=regexprep(modelSBML.species(i).compartment,'^C_',''); +0363 +0364 %I think that COBRA doesn't set the boundary condition, but rather +0365 %uses name_b. Check for either +0366 metaboliteUnconstrained(numel(metaboliteUnconstrained)+1,1)=modelSBML.species(i).boundaryCondition; +0367 if strcmp(metaboliteIDs{end}(max(end-1,1):end),'_b') +0368 metaboliteUnconstrained(end)=1; +0369 end +0370 +0371 %Get the formula +0372 if max(underscoreIndex)<length(modelSBML.species(i).name) +0373 metaboliteFormula{numel(metaboliteFormula)+1,1}=modelSBML.species(i).name(max(underscoreIndex)+1:length(modelSBML.species(i).name)); +0374 else +0375 metaboliteFormula{numel(metaboliteFormula)+1,1}=''; +0376 end +0377 +0378 %The old COBRA version sometimes has composition information in the +0379 %notes instead +0380 if isfield(modelSBML.species(i),'notes') && ~isempty(parseNote(modelSBML.species(i).notes,'FORMULA')) +0381 metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA'); +0382 end +0383 +0384 %Get Miriam info +0385 if ~isempty(modelSBML.species(i).annotation) +0386 metMiriam=parseMiriam(modelSBML.species(i).annotation); +0387 else +0388 metMiriam=[]; +0389 end +0390 metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam; +0391 +0392 %Get SBO term +0393 if isfield(modelSBML.species(i),'sboTerm') && ~(modelSBML.species(i).sboTerm==-1) +0394 metSBOs(end+1,1) = modelSBML.species(i).sboTerm; +0395 end +0396 end +0397 %The following lines are executed regardless isSBML2COBRA setting +0398 if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:2),'E_') +0399 if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:3),'Cx_') +0400 %Remove trailing [compartment] from metabolite name if present +0401 metaboliteNames{end,1}=regexprep(metaboliteNames{end,1},regexCompNames,''); +0402 metaboliteNames{end,1}=regexprep(metaboliteNames{end,1},'^M_',''); +0403 if isfield(modelSBML.species(i),'fbc_charge') +0404 if ~isempty(modelSBML.species(i).fbc_charge) && modelSBML.species(i).isSetfbc_charge +0405 metaboliteCharges(numel(metaboliteCharges)+1,1)=double(modelSBML.species(i).fbc_charge); +0406 else +0407 if isfield(modelSBML.species(i),'notes') +0408 if strfind(modelSBML.species(i).notes,'CHARGE') +0409 metaboliteCharges(numel(metaboliteCharges)+1,1)=str2double(parseNote(modelSBML.species(i).notes,'CHARGE')); +0410 else +0411 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; +0412 end +0413 else +0414 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; +0415 end +0416 end +0417 elseif isfield(modelSBML.species(i),'notes') +0418 if strfind(modelSBML.species(i).notes,'CHARGE') +0419 metaboliteCharges(numel(metaboliteCharges)+1,1)=str2double(parseNote(modelSBML.species(i).notes,'CHARGE')); +0420 else +0421 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; +0422 end +0423 else +0424 metaboliteCharges(numel(metaboliteCharges)+1,1)=NaN; +0425 end +0426 %Additional information from FBC format Chemical formula +0427 if isfield(modelSBML.species(i),'fbc_chemicalFormula') +0428 if ~isempty(modelSBML.species(i).fbc_chemicalFormula) +0429 metaboliteFormula{numel(metaboliteFormula),1}=modelSBML.species(i).fbc_chemicalFormula; +0430 end +0431 end +0432 end +0433 end +0434 end +0435 +0436 %Add SBO terms to gene and metabolite miriam fields +0437 if numel(unique(geneSBOs)) > 1 % don't add if they're all identical +0438 for i = 1:numel(geneNames) +0439 geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},geneSBOs(i)); +0440 end +0441 end +0442 if numel(unique(metSBOs)) > 1 +0443 for i = 1:numel(metaboliteNames) +0444 metaboliteMiriams{i} = addSBOtoMiriam(metaboliteMiriams{i},metSBOs(i)); +0445 end +0446 end +0447 +0448 %Retrieve info on reactions +0449 reactionNames=cell(numel(modelSBML.reaction),1); +0450 reactionIDs=cell(numel(modelSBML.reaction),1); +0451 subsystems=cell(numel(modelSBML.reaction),1); +0452 eccodes=cell(numel(modelSBML.reaction),1); +0453 eccodes(:,:)=cellstr(''); +0454 rxnconfidencescores=NaN(numel(modelSBML.reaction),1); +0455 rxnreferences=cell(numel(modelSBML.reaction),1); +0456 rxnreferences(:,:)=cellstr(''); +0457 rxnnotes=cell(numel(modelSBML.reaction),1); +0458 rxnnotes(:,:)=cellstr(''); +0459 grRules=cell(numel(modelSBML.reaction),1); +0460 grRules(:,:)=cellstr(''); +0461 grRulesFromModifier=grRules; +0462 rxnComps=zeros(numel(modelSBML.reaction),1); +0463 rxnMiriams=cell(numel(modelSBML.reaction),1); +0464 reactionReversibility=zeros(numel(modelSBML.reaction),1); +0465 reactionUB=zeros(numel(modelSBML.reaction),1); +0466 reactionLB=zeros(numel(modelSBML.reaction),1); +0467 reactionObjective=zeros(numel(modelSBML.reaction),1); 0468 -0469 counter=0; -0470 %If FBC, then bounds have parameter ids defined for the whole model -0471 if isfield(modelSBML,'parameter') -0472 parameter.name=cell(numel(modelSBML.parameter),1); -0473 parameter.name={modelSBML.parameter(:).id}'; -0474 parameter.value={modelSBML.parameter(:).value}'; -0475 end -0476 -0477 if isfield(modelSBML.reaction,'sboTerm') && numel(unique([modelSBML.reaction.sboTerm])) == 1 -0478 %If all the SBO terms are identical, don't add them to rxnMiriams -0479 modelSBML.reaction = rmfield(modelSBML.reaction,'sboTerm'); -0480 end -0481 -0482 for i=1:numel(modelSBML.reaction) -0483 -0484 %Check that the reaction doesn't produce a complex and nothing else. If -0485 %so, then jump to the next reaction. This is because I get the genes -0486 %for complexes from the names and not from the reactions that create -0487 %them. This only applies to the non-COBRA format -0488 if numel(modelSBML.reaction(i).product)==1 -0489 if length(modelSBML.reaction(i).product(1).species)>=3 -0490 if strcmp(modelSBML.reaction(i).product(1).species(1:3),'Cx_')==true -0491 continue; -0492 end -0493 end -0494 end -0495 -0496 %It didn't look like a gene complex-forming reaction -0497 counter=counter+1; +0469 %Construct the stoichiometric matrix while the reaction info is read +0470 S=zeros(numel(metaboliteIDs),numel(modelSBML.reaction)); +0471 +0472 counter=0; +0473 %If FBC, then bounds have parameter ids defined for the whole model +0474 if isfield(modelSBML,'parameter') +0475 parameter.name=cell(numel(modelSBML.parameter),1); +0476 parameter.name={modelSBML.parameter(:).id}'; +0477 parameter.value={modelSBML.parameter(:).value}'; +0478 end +0479 +0480 if isfield(modelSBML.reaction,'sboTerm') && numel(unique([modelSBML.reaction.sboTerm])) == 1 +0481 %If all the SBO terms are identical, don't add them to rxnMiriams +0482 modelSBML.reaction = rmfield(modelSBML.reaction,'sboTerm'); +0483 end +0484 +0485 for i=1:numel(modelSBML.reaction) +0486 +0487 %Check that the reaction doesn't produce a complex and nothing else. If +0488 %so, then jump to the next reaction. This is because I get the genes +0489 %for complexes from the names and not from the reactions that create +0490 %them. This only applies to the non-COBRA format +0491 if numel(modelSBML.reaction(i).product)==1 +0492 if length(modelSBML.reaction(i).product(1).species)>=3 +0493 if strcmp(modelSBML.reaction(i).product(1).species(1:3),'Cx_')==true +0494 continue; +0495 end +0496 end +0497 end 0498 -0499 reactionNames{counter}=modelSBML.reaction(i).name; -0500 -0501 reactionIDs{counter}=modelSBML.reaction(i).id; -0502 reactionReversibility(counter)=modelSBML.reaction(i).reversible; +0499 %It didn't look like a gene complex-forming reaction +0500 counter=counter+1; +0501 +0502 reactionNames{counter}=modelSBML.reaction(i).name; 0503 -0504 %If model is FBC, first get parameter of bound and then replace it with -0505 %the correct value. Probably faster with replace(), but this was only -0506 %introduced in Matlab R2016b -0507 if isfield(modelSBML.reaction(i),'fbc_lowerFluxBound') -0508 lb=modelSBML.reaction(i).fbc_lowerFluxBound; -0509 ub=modelSBML.reaction(i).fbc_upperFluxBound; -0510 for n=1:numel(parameter.value) -0511 lb=regexprep(lb,parameter.name(n),num2str(parameter.value{n})); -0512 ub=regexprep(ub,parameter.name(n),num2str(parameter.value{n})); -0513 end -0514 if isempty(lb) -0515 lb='-Inf'; +0504 reactionIDs{counter}=modelSBML.reaction(i).id; +0505 reactionReversibility(counter)=modelSBML.reaction(i).reversible; +0506 +0507 %If model is FBC, first get parameter of bound and then replace it with +0508 %the correct value. Probably faster with replace(), but this was only +0509 %introduced in Matlab R2016b +0510 if isfield(modelSBML.reaction(i),'fbc_lowerFluxBound') +0511 lb=modelSBML.reaction(i).fbc_lowerFluxBound; +0512 ub=modelSBML.reaction(i).fbc_upperFluxBound; +0513 for n=1:numel(parameter.value) +0514 lb=regexprep(lb,parameter.name(n),num2str(parameter.value{n})); +0515 ub=regexprep(ub,parameter.name(n),num2str(parameter.value{n})); 0516 end -0517 if isempty(ub) -0518 ub='Inf'; +0517 if isempty(lb) +0518 lb='-Inf'; 0519 end -0520 reactionLB(counter)=str2num(lb); -0521 reactionUB(counter)=str2num(ub); -0522 %The order of these parameters should not be hard coded -0523 elseif isfield(modelSBML.reaction(i).kineticLaw,'parameter') -0524 reactionLB(counter)=modelSBML.reaction(i).kineticLaw.parameter(1).value; -0525 reactionUB(counter)=modelSBML.reaction(i).kineticLaw.parameter(2).value; -0526 reactionObjective(counter)=modelSBML.reaction(i).kineticLaw.parameter(3).value; -0527 else -0528 if reactionReversibility(counter)==true -0529 reactionLB(counter)=-inf; -0530 else -0531 reactionLB(counter)=0; -0532 end -0533 reactionUB(counter)=inf; -0534 reactionObjective(counter)=0; -0535 end -0536 -0537 %Find the associated gene if available -0538 %If FBC, get gene association data from corresponding fields -0539 if isfield(modelSBML.reaction(i),'fbc_geneProductAssociation') -0540 if ~isempty(modelSBML.reaction(i).fbc_geneProductAssociation) && ~isempty(modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association) -0541 grRules{counter}=modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association; -0542 end -0543 elseif isfield(modelSBML.reaction(i),'notes') -0544 %This section was previously executed only if isSBML2COBRA is true. Now -0545 %it will be executed, if 'GENE_ASSOCIATION' is found in -0546 %modelSBML.reaction(i).notes -0547 if strfind(modelSBML.reaction(i).notes,'GENE_ASSOCIATION') -0548 geneAssociation=parseNote(modelSBML.reaction(i).notes,'GENE_ASSOCIATION'); -0549 elseif strfind(modelSBML.reaction(i).notes,'GENE ASSOCIATION') -0550 geneAssociation=parseNote(modelSBML.reaction(i).notes,'GENE ASSOCIATION'); -0551 else -0552 geneAssociation=''; -0553 end -0554 if ~isempty(geneAssociation) -0555 %This adds the grRules. The gene list and rxnGeneMat are created -0556 %later -0557 grRules{counter}=geneAssociation; -0558 end -0559 end -0560 if isempty(grRules{counter}) && ~isempty(modelSBML.reaction(i).modifier) -0561 rules=''; -0562 for j=1:numel(modelSBML.reaction(i).modifier) -0563 modifier=modelSBML.reaction(i).modifier(j).species; -0564 if ~isempty(modifier) -0565 if strcmpi(modifier(1:2),'E_') -0566 index=find(strcmp(modifier,geneIDs)); -0567 %This should be unique and in the geneIDs list, -0568 %otherwise something is wrong -0569 if numel(index)~=1 -0570 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; -0571 dispEM(EM); -0572 end -0573 if ~isempty(rules) -0574 rules=[rules ' or (' geneNames{index} ')']; -0575 else -0576 rules=['(' geneNames{index} ')']; -0577 end -0578 elseif strcmp(modifier(1:2),'s_') -0579 index=find(strcmp(modifier,metaboliteIDs)); -0580 %This should be unique and in the geneIDs list, -0581 %otherwise something is wrong -0582 if numel(index)~=1 -0583 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; -0584 dispEM(EM); -0585 end -0586 if ~isempty(rules) -0587 rules=[rules ' or (' metaboliteIDs{index} ')']; -0588 else -0589 rules=['(' metaboliteIDs{index} ')']; -0590 end -0591 else -0592 %It seems to be a complex. Add the corresponding -0593 %genes from the name of the complex (not the -0594 %reaction that creates it) -0595 index=find(strcmp(modifier,complexIDs)); -0596 if numel(index)==1 -0597 if ~isempty(rules) -0598 rules=[rules ' or (' strrep(complexNames{index},':',' and ') ')']; -0599 else -0600 rules=['(' strrep(complexNames{index},':',' and ') ')']; -0601 end -0602 else -0603 %Could not find a complex -0604 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; -0605 dispEM(EM); -0606 end -0607 end -0608 end -0609 end -0610 grRules{counter}=rules; -0611 grRulesFromModifier{counter}=rules;%Backup copy for grRules, useful to parse Yeast 7.6 -0612 end -0613 -0614 %Add reaction compartment -0615 if isfield(modelSBML.reaction(i),'compartment') -0616 if ~isempty(modelSBML.reaction(i).compartment) -0617 rxnComp=modelSBML.reaction(i).compartment; -0618 else -0619 rxnComp=''; -0620 end -0621 elseif isfield(modelSBML.reaction(i),'notes') -0622 rxnComp=parseNote(modelSBML.reaction(i).notes,'COMPARTMENT'); -0623 end -0624 if ~isempty(rxnComp) -0625 %Find it in the compartment list -0626 [~, J]=ismember(rxnComp,compartmentIDs); -0627 rxnComps(counter)=J; -0628 end -0629 -0630 %Get other Miriam fields. This may include for example database indexes -0631 %to organism-specific databases. EC-codes are supported by the COBRA -0632 %Toolbox format and are therefore loaded separately -0633 if isSBML2COBRA==false -0634 miriamStruct=parseMiriam(modelSBML.reaction(i).annotation); -0635 rxnMiriams{counter}=miriamStruct; -0636 if isfield(modelSBML.reaction(i),'notes') -0637 subsystems{counter,1}=cellstr(parseNote(modelSBML.reaction(i).notes,'SUBSYSTEM')); -0638 subsystems{counter,1}(cellfun('isempty',subsystems{counter,1})) = []; -0639 if strfind(modelSBML.reaction(i).notes,'Confidence Level') -0640 confScore = parseNote(modelSBML.reaction(i).notes,'Confidence Level'); -0641 if isempty(confScore) -0642 confScore = 0; -0643 end -0644 rxnconfidencescores(counter)=str2double(confScore); -0645 end -0646 rxnreferences{counter,1}=parseNote(modelSBML.reaction(i).notes,'AUTHORS'); -0647 rxnnotes{counter,1}=parseNote(modelSBML.reaction(i).notes,'NOTES'); -0648 end -0649 end -0650 -0651 %Get SBO terms -0652 if isfield(modelSBML.reaction(i),'sboTerm') && ~(modelSBML.reaction(i).sboTerm==-1) -0653 rxnMiriams{counter} = addSBOtoMiriam(rxnMiriams{counter}, modelSBML.reaction(i).sboTerm); -0654 end -0655 -0656 %Get ec-codes -0657 eccode=''; -0658 if ~isempty(modelSBML.reaction(i).annotation) -0659 if strfind(modelSBML.reaction(i).annotation,'urn:miriam:ec-code') -0660 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'urn:miriam:',':','ec-code'); -0661 elseif strfind(modelSBML.reaction(i).annotation,'http://identifiers.org/ec-code') -0662 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'http://identifiers.org/','/','ec-code'); -0663 elseif strfind(modelSBML.reaction(i).annotation,'https://identifiers.org/ec-code') -0664 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'https://identifiers.org/','/','ec-code'); -0665 end -0666 elseif isfield(modelSBML.reaction(i),'notes') -0667 if strfind(modelSBML.reaction(i).notes,'EC Number') -0668 eccode=[eccode parseNote(modelSBML.reaction(i).notes,'EC Number')]; -0669 elseif strfind(modelSBML.reaction(i).notes,'PROTEIN_CLASS') -0670 eccode=[eccode parseNote(modelSBML.reaction(i).notes,'PROTEIN_CLASS')]; -0671 end -0672 end -0673 eccodes{counter}=eccode; -0674 -0675 %Add all reactants -0676 for j=1:numel(modelSBML.reaction(i).reactant) -0677 %Get the index of the metabolite in metaboliteIDs. External -0678 %metabolites will be removed at a later stage -0679 metIndex=find(strcmp(modelSBML.reaction(i).reactant(j).species,metaboliteIDs),1); -0680 if isempty(metIndex) -0681 EM=['Could not find metabolite ' modelSBML.reaction(i).reactant(j).species ' in reaction ' reactionIDs{counter}]; -0682 dispEM(EM); -0683 end -0684 S(metIndex,counter)=S(metIndex,counter)+modelSBML.reaction(i).reactant(j).stoichiometry*-1; -0685 end -0686 -0687 %Add all products -0688 for j=1:numel(modelSBML.reaction(i).product) -0689 %Get the index of the metabolite in metaboliteIDs. -0690 metIndex=find(strcmp(modelSBML.reaction(i).product(j).species,metaboliteIDs),1); -0691 if isempty(metIndex) -0692 EM=['Could not find metabolite ' modelSBML.reaction(i).product(j).species ' in reaction ' reactionIDs{counter}]; -0693 dispEM(EM); -0694 end -0695 S(metIndex,counter)=S(metIndex,counter)+modelSBML.reaction(i).product(j).stoichiometry; -0696 end -0697 end -0698 -0699 %if FBC, objective function is separately defined. Multiple objective -0700 %functions can be defined, one is set as active -0701 if isfield(modelSBML, 'fbc_activeObjective') -0702 obj=modelSBML.fbc_activeObjective; -0703 for i=1:numel(modelSBML.fbc_objective) -0704 if strcmp(obj,modelSBML.fbc_objective(i).fbc_id) -0705 if ~isempty(modelSBML.fbc_objective(i).fbc_fluxObjective) -0706 rxn=modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction; -0707 rxn=regexprep(rxn,'^R_',''); -0708 idx=find(ismember(reactionIDs,rxn)); -0709 reactionObjective(idx)=modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient; -0710 end -0711 end -0712 end -0713 end -0714 -0715 %subSystems can be stored as groups instead of in annotations -0716 if isfield(modelSBML,'groups_group') -0717 for i=1:numel(modelSBML.groups_group) -0718 groupreactions={modelSBML.groups_group(i).groups_member(:).groups_idRef}; -0719 groupreactions=regexprep(groupreactions,'^R_',''); -0720 [~, idx] = ismember(groupreactions, reactionIDs); -0721 if any(idx) -0722 for j=1:numel(idx) -0723 if isempty(subsystems{idx(j)}) % First subsystem -0724 subsystems{idx(j)} = {modelSBML.groups_group(i).groups_name}; -0725 else % Consecutive subsystems: concatenate -0726 subsystems{idx(j)} = horzcat(subsystems{idx(j)}, modelSBML.groups_group(i).groups_name); -0727 end -0728 end -0729 end -0730 end -0731 end -0732 -0733 %Shrink the structures if complex-forming reactions had to be skipped -0734 reactionNames=reactionNames(1:counter); -0735 reactionIDs=reactionIDs(1:counter); -0736 subsystems=subsystems(1:counter); -0737 eccodes=eccodes(1:counter); -0738 rxnconfidencescores=rxnconfidencescores(1:counter); -0739 rxnreferences=rxnreferences(1:counter); -0740 rxnnotes=rxnnotes(1:counter); -0741 grRules=grRules(1:counter); -0742 rxnMiriams=rxnMiriams(1:counter); -0743 reactionReversibility=reactionReversibility(1:counter); -0744 reactionUB=reactionUB(1:counter); -0745 reactionLB=reactionLB(1:counter); -0746 reactionObjective=reactionObjective(1:counter); -0747 S=S(:,1:counter); -0748 -0749 model.name=modelSBML.name; -0750 model.id=regexprep(modelSBML.id,'^M_',''); % COBRA adds M_ prefix -0751 model.rxns=reactionIDs; -0752 model.mets=metaboliteIDs; -0753 model.S=sparse(S); -0754 model.lb=reactionLB; -0755 model.ub=reactionUB; -0756 model.rev=reactionReversibility; -0757 model.c=reactionObjective; -0758 model.b=zeros(numel(metaboliteIDs),1); -0759 model.comps=compartmentIDs; -0760 model.compNames=compartmentNames; -0761 model.rxnConfidenceScores=rxnconfidencescores; -0762 model.rxnReferences=rxnreferences; -0763 model.rxnNotes=rxnnotes; -0764 -0765 %Load annotation if available. If there are several authors, only the first -0766 %author credentials are imported -0767 if isfield(modelSBML,'annotation') -0768 endString='</'; -0769 I=strfind(modelSBML.annotation,endString); -0770 J=strfind(modelSBML.annotation,'<vCard:Family>'); -0771 if any(J) -0772 model.annotation.familyName=modelSBML.annotation(J(1)+14:I(find(I>J(1),1))-1); -0773 end -0774 J=strfind(modelSBML.annotation,'<vCard:Given>'); -0775 if any(J) -0776 model.annotation.givenName=modelSBML.annotation(J(1)+13:I(find(I>J(1),1))-1); -0777 end -0778 J=strfind(modelSBML.annotation,'<vCard:EMAIL>'); -0779 if any(J) -0780 model.annotation.email=modelSBML.annotation(J(1)+13:I(find(I>J(1),1))-1); -0781 end -0782 J=strfind(modelSBML.annotation,'<vCard:Orgname>'); -0783 if any(J) -0784 model.annotation.organization=modelSBML.annotation(J(1)+15:I(find(I>J(1),1))-1); -0785 end -0786 endString='"/>'; -0787 I=strfind(modelSBML.annotation,endString); -0788 if strfind(modelSBML.annotation,'"urn:miriam:') -0789 J=strfind(modelSBML.annotation,'"urn:miriam:'); -0790 if any(J) -0791 model.annotation.taxonomy=modelSBML.annotation(J+12:I(find(I>J,1))-1); -0792 end -0793 else -0794 J=strfind(modelSBML.annotation,'"http://identifiers.org/'); -0795 if any(J) -0796 model.annotation.taxonomy=modelSBML.annotation(J+24:I(find(I>J,1))-1); -0797 else -0798 J=strfind(modelSBML.annotation,'"https://identifiers.org/'); -0799 if any(J) -0800 model.annotation.taxonomy=modelSBML.annotation(J+25:I(find(I>J,1))-1); -0801 end -0802 end -0803 end -0804 end -0805 if isfield(modelSBML,'notes') -0806 startString=strfind(modelSBML.notes,'xhtml">'); -0807 endString=strfind(modelSBML.notes,'</body>'); -0808 if any(startString) && any(endString) -0809 model.annotation.note=modelSBML.notes(startString+7:endString-1); -0810 model.annotation.note=regexprep(model.annotation.note,'<p>|</p>',''); -0811 model.annotation.note=strtrim(model.annotation.note); -0812 if regexp(model.annotation.note,'This file was generated using the exportModel function in RAVEN Toolbox \d\.\d and OutputSBML in libSBML') -0813 model.annotation=rmfield(model.annotation,'note'); % Default note added when running exportModel -0814 end -0815 end -0816 end -0817 -0818 if any(~cellfun(@isempty,compartmentOutside)) -0819 model.compOutside=compartmentOutside; -0820 end -0821 -0822 model.rxnNames=reactionNames; -0823 model.metNames=metaboliteNames; +0520 if isempty(ub) +0521 ub='Inf'; +0522 end +0523 reactionLB(counter)=str2num(lb); +0524 reactionUB(counter)=str2num(ub); +0525 %The order of these parameters should not be hard coded +0526 elseif isfield(modelSBML.reaction(i).kineticLaw,'parameter') +0527 reactionLB(counter)=modelSBML.reaction(i).kineticLaw.parameter(1).value; +0528 reactionUB(counter)=modelSBML.reaction(i).kineticLaw.parameter(2).value; +0529 reactionObjective(counter)=modelSBML.reaction(i).kineticLaw.parameter(3).value; +0530 else +0531 if reactionReversibility(counter)==true +0532 reactionLB(counter)=-inf; +0533 else +0534 reactionLB(counter)=0; +0535 end +0536 reactionUB(counter)=inf; +0537 reactionObjective(counter)=0; +0538 end +0539 +0540 %Find the associated gene if available +0541 %If FBC, get gene association data from corresponding fields +0542 if isfield(modelSBML.reaction(i),'fbc_geneProductAssociation') +0543 if ~isempty(modelSBML.reaction(i).fbc_geneProductAssociation) && ~isempty(modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association) +0544 grRules{counter}=modelSBML.reaction(i).fbc_geneProductAssociation.fbc_association.fbc_association; +0545 end +0546 elseif isfield(modelSBML.reaction(i),'notes') +0547 %This section was previously executed only if isSBML2COBRA is true. Now +0548 %it will be executed, if 'GENE_ASSOCIATION' is found in +0549 %modelSBML.reaction(i).notes +0550 if strfind(modelSBML.reaction(i).notes,'GENE_ASSOCIATION') +0551 geneAssociation=parseNote(modelSBML.reaction(i).notes,'GENE_ASSOCIATION'); +0552 elseif strfind(modelSBML.reaction(i).notes,'GENE ASSOCIATION') +0553 geneAssociation=parseNote(modelSBML.reaction(i).notes,'GENE ASSOCIATION'); +0554 else +0555 geneAssociation=''; +0556 end +0557 if ~isempty(geneAssociation) +0558 %This adds the grRules. The gene list and rxnGeneMat are created +0559 %later +0560 grRules{counter}=geneAssociation; +0561 end +0562 end +0563 if isempty(grRules{counter}) && ~isempty(modelSBML.reaction(i).modifier) +0564 rules=''; +0565 for j=1:numel(modelSBML.reaction(i).modifier) +0566 modifier=modelSBML.reaction(i).modifier(j).species; +0567 if ~isempty(modifier) +0568 if strcmpi(modifier(1:2),'E_') +0569 index=find(strcmp(modifier,geneIDs)); +0570 %This should be unique and in the geneIDs list, +0571 %otherwise something is wrong +0572 if numel(index)~=1 +0573 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; +0574 dispEM(EM); +0575 end +0576 if ~isempty(rules) +0577 rules=[rules ' or (' geneNames{index} ')']; +0578 else +0579 rules=['(' geneNames{index} ')']; +0580 end +0581 elseif strcmp(modifier(1:2),'s_') +0582 index=find(strcmp(modifier,metaboliteIDs)); +0583 %This should be unique and in the geneIDs list, +0584 %otherwise something is wrong +0585 if numel(index)~=1 +0586 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; +0587 dispEM(EM); +0588 end +0589 if ~isempty(rules) +0590 rules=[rules ' or (' metaboliteIDs{index} ')']; +0591 else +0592 rules=['(' metaboliteIDs{index} ')']; +0593 end +0594 else +0595 %It seems to be a complex. Add the corresponding +0596 %genes from the name of the complex (not the +0597 %reaction that creates it) +0598 index=find(strcmp(modifier,complexIDs)); +0599 if numel(index)==1 +0600 if ~isempty(rules) +0601 rules=[rules ' or (' strrep(complexNames{index},':',' and ') ')']; +0602 else +0603 rules=['(' strrep(complexNames{index},':',' and ') ')']; +0604 end +0605 else +0606 %Could not find a complex +0607 EM=['Could not get the gene association data from reaction ' reactionIDs{i}]; +0608 dispEM(EM); +0609 end +0610 end +0611 end +0612 end +0613 grRules{counter}=rules; +0614 grRulesFromModifier{counter}=rules;%Backup copy for grRules, useful to parse Yeast 7.6 +0615 end +0616 +0617 %Add reaction compartment +0618 if isfield(modelSBML.reaction(i),'compartment') +0619 if ~isempty(modelSBML.reaction(i).compartment) +0620 rxnComp=modelSBML.reaction(i).compartment; +0621 else +0622 rxnComp=''; +0623 end +0624 elseif isfield(modelSBML.reaction(i),'notes') +0625 rxnComp=parseNote(modelSBML.reaction(i).notes,'COMPARTMENT'); +0626 end +0627 if ~isempty(rxnComp) +0628 %Find it in the compartment list +0629 [~, J]=ismember(rxnComp,compartmentIDs); +0630 rxnComps(counter)=J; +0631 end +0632 +0633 %Get other Miriam fields. This may include for example database indexes +0634 %to organism-specific databases. EC-codes are supported by the COBRA +0635 %Toolbox format and are therefore loaded separately +0636 if isSBML2COBRA==false +0637 miriamStruct=parseMiriam(modelSBML.reaction(i).annotation); +0638 rxnMiriams{counter}=miriamStruct; +0639 if isfield(modelSBML.reaction(i),'notes') +0640 subsystems{counter,1}=cellstr(parseNote(modelSBML.reaction(i).notes,'SUBSYSTEM')); +0641 subsystems{counter,1}(cellfun('isempty',subsystems{counter,1})) = []; +0642 if strfind(modelSBML.reaction(i).notes,'Confidence Level') +0643 confScore = parseNote(modelSBML.reaction(i).notes,'Confidence Level'); +0644 if isempty(confScore) +0645 confScore = 0; +0646 end +0647 rxnconfidencescores(counter)=str2double(confScore); +0648 end +0649 rxnreferences{counter,1}=parseNote(modelSBML.reaction(i).notes,'AUTHORS'); +0650 rxnnotes{counter,1}=parseNote(modelSBML.reaction(i).notes,'NOTES'); +0651 end +0652 end +0653 +0654 %Get SBO terms +0655 if isfield(modelSBML.reaction(i),'sboTerm') && ~(modelSBML.reaction(i).sboTerm==-1) +0656 rxnMiriams{counter} = addSBOtoMiriam(rxnMiriams{counter}, modelSBML.reaction(i).sboTerm); +0657 end +0658 +0659 %Get ec-codes +0660 eccode=''; +0661 if ~isempty(modelSBML.reaction(i).annotation) +0662 if strfind(modelSBML.reaction(i).annotation,'urn:miriam:ec-code') +0663 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'urn:miriam:',':','ec-code'); +0664 elseif strfind(modelSBML.reaction(i).annotation,'http://identifiers.org/ec-code') +0665 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'http://identifiers.org/','/','ec-code'); +0666 elseif strfind(modelSBML.reaction(i).annotation,'https://identifiers.org/ec-code') +0667 eccode=parseAnnotation(modelSBML.reaction(i).annotation,'https://identifiers.org/','/','ec-code'); +0668 end +0669 elseif isfield(modelSBML.reaction(i),'notes') +0670 if strfind(modelSBML.reaction(i).notes,'EC Number') +0671 eccode=[eccode parseNote(modelSBML.reaction(i).notes,'EC Number')]; +0672 elseif strfind(modelSBML.reaction(i).notes,'PROTEIN_CLASS') +0673 eccode=[eccode parseNote(modelSBML.reaction(i).notes,'PROTEIN_CLASS')]; +0674 end +0675 end +0676 eccodes{counter}=eccode; +0677 +0678 %Add all reactants +0679 for j=1:numel(modelSBML.reaction(i).reactant) +0680 %Get the index of the metabolite in metaboliteIDs. External +0681 %metabolites will be removed at a later stage +0682 metIndex=find(strcmp(modelSBML.reaction(i).reactant(j).species,metaboliteIDs),1); +0683 if isempty(metIndex) +0684 EM=['Could not find metabolite ' modelSBML.reaction(i).reactant(j).species ' in reaction ' reactionIDs{counter}]; +0685 dispEM(EM); +0686 end +0687 S(metIndex,counter)=S(metIndex,counter)+modelSBML.reaction(i).reactant(j).stoichiometry*-1; +0688 end +0689 +0690 %Add all products +0691 for j=1:numel(modelSBML.reaction(i).product) +0692 %Get the index of the metabolite in metaboliteIDs. +0693 metIndex=find(strcmp(modelSBML.reaction(i).product(j).species,metaboliteIDs),1); +0694 if isempty(metIndex) +0695 EM=['Could not find metabolite ' modelSBML.reaction(i).product(j).species ' in reaction ' reactionIDs{counter}]; +0696 dispEM(EM); +0697 end +0698 S(metIndex,counter)=S(metIndex,counter)+modelSBML.reaction(i).product(j).stoichiometry; +0699 end +0700 end +0701 +0702 %if FBC, objective function is separately defined. Multiple objective +0703 %functions can be defined, one is set as active +0704 if isfield(modelSBML, 'fbc_activeObjective') +0705 obj=modelSBML.fbc_activeObjective; +0706 for i=1:numel(modelSBML.fbc_objective) +0707 if strcmp(obj,modelSBML.fbc_objective(i).fbc_id) +0708 if ~isempty(modelSBML.fbc_objective(i).fbc_fluxObjective) +0709 rxn=modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_reaction; +0710 rxn=regexprep(rxn,'^R_',''); +0711 idx=find(ismember(reactionIDs,rxn)); +0712 reactionObjective(idx)=modelSBML.fbc_objective(i).fbc_fluxObjective.fbc_coefficient; +0713 end +0714 end +0715 end +0716 end +0717 +0718 %subSystems can be stored as groups instead of in annotations +0719 if isfield(modelSBML,'groups_group') +0720 for i=1:numel(modelSBML.groups_group) +0721 groupreactions={modelSBML.groups_group(i).groups_member(:).groups_idRef}; +0722 groupreactions=regexprep(groupreactions,'^R_',''); +0723 [~, idx] = ismember(groupreactions, reactionIDs); +0724 if any(idx) +0725 for j=1:numel(idx) +0726 if isempty(subsystems{idx(j)}) % First subsystem +0727 subsystems{idx(j)} = {modelSBML.groups_group(i).groups_name}; +0728 else % Consecutive subsystems: concatenate +0729 subsystems{idx(j)} = horzcat(subsystems{idx(j)}, modelSBML.groups_group(i).groups_name); +0730 end +0731 end +0732 end +0733 end +0734 end +0735 +0736 %Shrink the structures if complex-forming reactions had to be skipped +0737 reactionNames=reactionNames(1:counter); +0738 reactionIDs=reactionIDs(1:counter); +0739 subsystems=subsystems(1:counter); +0740 eccodes=eccodes(1:counter); +0741 rxnconfidencescores=rxnconfidencescores(1:counter); +0742 rxnreferences=rxnreferences(1:counter); +0743 rxnnotes=rxnnotes(1:counter); +0744 grRules=grRules(1:counter); +0745 rxnMiriams=rxnMiriams(1:counter); +0746 reactionReversibility=reactionReversibility(1:counter); +0747 reactionUB=reactionUB(1:counter); +0748 reactionLB=reactionLB(1:counter); +0749 reactionObjective=reactionObjective(1:counter); +0750 S=S(:,1:counter); +0751 +0752 model.name=modelSBML.name; +0753 model.id=regexprep(modelSBML.id,'^M_',''); % COBRA adds M_ prefix +0754 model.rxns=reactionIDs; +0755 model.mets=metaboliteIDs; +0756 model.S=sparse(S); +0757 model.lb=reactionLB; +0758 model.ub=reactionUB; +0759 model.rev=reactionReversibility; +0760 model.c=reactionObjective; +0761 model.b=zeros(numel(metaboliteIDs),1); +0762 model.comps=compartmentIDs; +0763 model.compNames=compartmentNames; +0764 model.rxnConfidenceScores=rxnconfidencescores; +0765 model.rxnReferences=rxnreferences; +0766 model.rxnNotes=rxnnotes; +0767 +0768 %Load annotation if available. If there are several authors, only the first +0769 %author credentials are imported +0770 if isfield(modelSBML,'annotation') +0771 endString='</'; +0772 I=strfind(modelSBML.annotation,endString); +0773 J=strfind(modelSBML.annotation,'<vCard:Family>'); +0774 if any(J) +0775 model.annotation.familyName=modelSBML.annotation(J(1)+14:I(find(I>J(1),1))-1); +0776 end +0777 J=strfind(modelSBML.annotation,'<vCard:Given>'); +0778 if any(J) +0779 model.annotation.givenName=modelSBML.annotation(J(1)+13:I(find(I>J(1),1))-1); +0780 end +0781 J=strfind(modelSBML.annotation,'<vCard:EMAIL>'); +0782 if any(J) +0783 model.annotation.email=modelSBML.annotation(J(1)+13:I(find(I>J(1),1))-1); +0784 end +0785 J=strfind(modelSBML.annotation,'<vCard:Orgname>'); +0786 if any(J) +0787 model.annotation.organization=modelSBML.annotation(J(1)+15:I(find(I>J(1),1))-1); +0788 end +0789 endString='"/>'; +0790 I=strfind(modelSBML.annotation,endString); +0791 if strfind(modelSBML.annotation,'"urn:miriam:') +0792 J=strfind(modelSBML.annotation,'"urn:miriam:'); +0793 if any(J) +0794 model.annotation.taxonomy=modelSBML.annotation(J+12:I(find(I>J,1))-1); +0795 end +0796 else +0797 J=strfind(modelSBML.annotation,'"http://identifiers.org/'); +0798 if any(J) +0799 model.annotation.taxonomy=modelSBML.annotation(J+24:I(find(I>J,1))-1); +0800 else +0801 J=strfind(modelSBML.annotation,'"https://identifiers.org/'); +0802 if any(J) +0803 model.annotation.taxonomy=modelSBML.annotation(J+25:I(find(I>J,1))-1); +0804 end +0805 end +0806 end +0807 end +0808 if isfield(modelSBML,'notes') +0809 startString=strfind(modelSBML.notes,'xhtml">'); +0810 endString=strfind(modelSBML.notes,'</body>'); +0811 if any(startString) && any(endString) +0812 model.annotation.note=modelSBML.notes(startString+7:endString-1); +0813 model.annotation.note=regexprep(model.annotation.note,'<p>|</p>',''); +0814 model.annotation.note=strtrim(model.annotation.note); +0815 if regexp(model.annotation.note,'This file was generated using the exportModel function in RAVEN Toolbox \d\.\d and OutputSBML in libSBML') +0816 model.annotation=rmfield(model.annotation,'note'); % Default note added when running exportModel +0817 end +0818 end +0819 end +0820 +0821 if any(~cellfun(@isempty,compartmentOutside)) +0822 model.compOutside=compartmentOutside; +0823 end 0824 -0825 %Match the compartments for metabolites -0826 [~, J]=ismember(metaboliteCompartments,model.comps); -0827 model.metComps=J; -0828 -0829 %If any genes have been loaded (only for the new format) -0830 if ~isempty(geneNames) -0831 %In some rare cases geneNames may not necessarily be used in grRules. -0832 %That is true for Yeast 7.6. It's therefore important to change gene -0833 %systematic names to geneIDs in sophisticated way. Gene systematic -0834 %names are not unique, since exactly the same name may be in different -0835 %compartments -0836 if all(cellfun(@isempty,strfind(grRules,geneNames{1}))) -0837 geneShortNames=geneNames; -0838 %geneShortNames contain compartments as well, so these are removed -0839 geneShortNames=regexprep(geneShortNames,' \[.+$',''); -0840 %grRules obtained from modifier fields contain geneNames. These are -0841 %changed into geneIDs. grRulesFromModifier is a good way to have -0842 %geneIDs and rxns association when it's important to resolve -0843 %systematic name ambiguities -0844 grRulesFromModifier=regexprep(regexprep(grRulesFromModifier,'\[|\]','_'),regexprep(geneNames,'\[|\]','_'),geneIDs); -0845 grRules=regexprep(regexprep(grRules,'\[|\]','_'),regexprep(geneNames,'\[|\]','_'),geneIDs); -0846 -0847 %Yeast 7.6 contains several metabolites, which were used in gene -0848 %associations. For that reason, the list of species ID is created -0849 %and we then check whether any of them have kegg.genes annotation -0850 %thereby obtaining systematic gene names -0851 geneShortNames=vertcat(geneShortNames,metaboliteNames); -0852 geneIDs=vertcat(geneIDs,metaboliteIDs); -0853 geneSystNames=extractMiriam(vertcat(geneMiriams,metaboliteMiriams),'kegg.genes'); -0854 geneCompartments=vertcat(geneCompartments,metaboliteCompartments); -0855 geneMiriams=vertcat(geneMiriams,metaboliteMiriams); -0856 -0857 %Now we retain information for only these entries, which have -0858 %kegg.genes annotation -0859 geneShortNames=geneShortNames(~cellfun('isempty',geneSystNames)); -0860 geneIDs=geneIDs(~cellfun('isempty',geneSystNames)); -0861 geneSystNames=geneSystNames(~cellfun('isempty',geneSystNames)); -0862 geneCompartments=geneCompartments(~cellfun('isempty',geneSystNames)); -0863 geneMiriams=geneMiriams(~cellfun('isempty',geneSystNames)); -0864 %Now we reorder geneIDs and geneSystNames by geneSystNames string -0865 %length -0866 geneNames=geneIDs;%Backuping geneIDs, since we need unsorted order for later -0867 [~, Indx] = sort(cellfun('size', geneSystNames, 2), 'descend'); -0868 geneIDs = geneIDs(Indx); -0869 geneSystNames = geneSystNames(Indx); -0870 for i=1:numel(geneSystNames) -0871 for j=1:numel(grRules) -0872 if strfind(grRules{j},geneSystNames{i}) -0873 if ~isempty(grRules{j}) -0874 if sum(ismember(geneSystNames,geneSystNames{i}))==1 -0875 grRules{j}=regexprep(grRules{j},geneSystNames{i},geneIDs{i}); -0876 elseif sum(ismember(geneSystNames,geneSystNames{i}))>1 -0877 counter=0; -0878 ovrlpIDs=geneIDs(ismember(geneSystNames,geneSystNames{i})); -0879 for k=1:numel(ovrlpIDs) -0880 if strfind(grRulesFromModifier{j},ovrlpIDs{k}) -0881 counter=counter+1; -0882 grRules{j}=regexprep(grRules{j},geneSystNames{i},ovrlpIDs{k}); -0883 end -0884 if counter>1 -0885 EM=['Gene association is ambiguous for reaction ' modelSBML.reaction(j).id]; -0886 dispEM(EM); -0887 end -0888 end -0889 end -0890 end -0891 end -0892 end -0893 end -0894 end -0895 model.genes=geneNames; -0896 model.grRules=grRules; -0897 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0898 model.grRules = grRules; -0899 model.rxnGeneMat = rxnGeneMat; -0900 -0901 %Match the compartments for genes -0902 [~, J]=ismember(geneCompartments,model.comps); -0903 model.geneComps=J; -0904 else -0905 if ~all(cellfun(@isempty,grRules)) -0906 %If fbc_geneProduct exists, follow the specified gene order, such -0907 %that matching geneShortNames in function below will work -0908 if isfield(modelSBML,'fbc_geneProduct') -0909 genes={modelSBML.fbc_geneProduct.fbc_id}; -0910 -0911 %Get gene Miriams if they were not retrieved above (this occurs -0912 %when genes are stored as fbc_geneProduct instead of species) -0913 if isempty(geneMiriams) -0914 geneMiriams = cell(numel(genes),1); -0915 if isfield(modelSBML.fbc_geneProduct,'sboTerm') && numel(unique([modelSBML.fbc_geneProduct.sboTerm])) == 1 -0916 %If all the SBO terms are identical, don't add them to geneMiriams -0917 modelSBML.fbc_geneProduct = rmfield(modelSBML.fbc_geneProduct,'sboTerm'); -0918 end -0919 for i = 1:numel(genes) -0920 geneMiriams{i}=parseMiriam(modelSBML.fbc_geneProduct(i).annotation); -0921 if isfield(modelSBML.fbc_geneProduct(i),'sboTerm') && ~(modelSBML.fbc_geneProduct(i).sboTerm==-1) -0922 geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},modelSBML.fbc_geneProduct(i).sboTerm); -0923 end -0924 end -0925 end -0926 else -0927 genes=getGeneList(grRules); -0928 end -0929 if strcmpi(genes{1}(1:2),'G_') -0930 genes=regexprep(genes,'^G_',''); -0931 grRules=regexprep(grRules,'^G_',''); -0932 grRules=regexprep(grRules,'\(G_','('); -0933 grRules=regexprep(grRules,' G_',' '); -0934 end -0935 model.genes=genes; -0936 model.grRules=grRules; -0937 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0938 model.grRules = grRules; -0939 model.rxnGeneMat = rxnGeneMat; -0940 end -0941 end -0942 -0943 if all(cellfun(@isempty,geneShortNames)) -0944 if isfield(modelSBML,'fbc_geneProduct') -0945 for i=1:numel(genes) -0946 if ~isempty(modelSBML.fbc_geneProduct(i).fbc_label) -0947 geneShortNames{i,1}=modelSBML.fbc_geneProduct(i).fbc_label; -0948 elseif ~isempty(modelSBML.fbc_geneProduct(i).fbc_name) -0949 geneShortNames{i,1}=modelSBML.fbc_geneProduct(i).fbc_name; -0950 else -0951 geneShortNames{i,1}=''; -0952 end -0953 end -0954 end -0955 end -0956 -0957 %If any InChIs have been loaded -0958 if any(~cellfun(@isempty,metaboliteInChI)) -0959 model.inchis=metaboliteInChI; -0960 end -0961 -0962 %If any formulas have been loaded -0963 if any(~cellfun(@isempty,metaboliteFormula)) -0964 model.metFormulas=metaboliteFormula; -0965 end -0966 -0967 %If any charges have been loaded -0968 if ~isempty(metaboliteCharges) -0969 model.metCharges=metaboliteCharges; -0970 end -0971 -0972 %If any gene short names have been loaded -0973 if any(~cellfun(@isempty,geneShortNames)) -0974 model.geneShortNames=geneShortNames; -0975 end -0976 -0977 %If any Miriam strings for compartments have been loaded -0978 if any(~cellfun(@isempty,compartmentMiriams)) -0979 model.compMiriams=compartmentMiriams; -0980 end -0981 -0982 %If any Miriam strings for metabolites have been loaded -0983 if any(~cellfun(@isempty,metaboliteMiriams)) -0984 model.metMiriams=metaboliteMiriams; -0985 end -0986 -0987 %If any subsystems have been loaded -0988 if any(~cellfun(@isempty,subsystems)) -0989 model.subSystems=subsystems; -0990 end -0991 if any(rxnComps) -0992 if all(rxnComps) -0993 model.rxnComps=rxnComps; -0994 else -0995 if supressWarnings==false -0996 EM='The compartments for the following reactions could not be matched. Ignoring reaction compartment information'; -0997 dispEM(EM,false,model.rxns(rxnComps==0)); -0998 end -0999 end -1000 end -1001 -1002 %If any ec-codes have been loaded -1003 if any(~cellfun(@isempty,eccodes)) -1004 model.eccodes=eccodes; -1005 end -1006 -1007 %If any Miriam strings for reactions have been loaded -1008 if any(~cellfun(@isempty,rxnMiriams)) -1009 model.rxnMiriams=rxnMiriams; -1010 end -1011 -1012 %If any Miriam strings for genes have been loaded -1013 if any(~cellfun(@isempty,geneMiriams)) -1014 model.geneMiriams=geneMiriams; -1015 end -1016 -1017 model.unconstrained=metaboliteUnconstrained; -1018 -1019 %Convert SBML IDs back into their original strings. Here we are using part -1020 %from convertSBMLID, originating from the COBRA Toolbox -1021 model.rxns=regexprep(model.rxns,'__([0-9]+)__','${char(str2num($1))}'); -1022 model.mets=regexprep(model.mets,'__([0-9]+)__','${char(str2num($1))}'); -1023 model.comps=regexprep(model.comps,'__([0-9]+)__','${char(str2num($1))}'); -1024 model.grRules=regexprep(model.grRules,'__([0-9]+)__','${char(str2num($1))}'); -1025 model.genes=regexprep(model.genes,'__([0-9]+)__','${char(str2num($1))}'); -1026 model.id=regexprep(model.id,'__([0-9]+)__','${char(str2num($1))}'); +0825 model.rxnNames=reactionNames; +0826 model.metNames=metaboliteNames; +0827 +0828 %Match the compartments for metabolites +0829 [~, J]=ismember(metaboliteCompartments,model.comps); +0830 model.metComps=J; +0831 +0832 %If any genes have been loaded (only for the new format) +0833 if ~isempty(geneNames) +0834 %In some rare cases geneNames may not necessarily be used in grRules. +0835 %That is true for Yeast 7.6. It's therefore important to change gene +0836 %systematic names to geneIDs in sophisticated way. Gene systematic +0837 %names are not unique, since exactly the same name may be in different +0838 %compartments +0839 if all(cellfun(@isempty,strfind(grRules,geneNames{1}))) +0840 geneShortNames=geneNames; +0841 %geneShortNames contain compartments as well, so these are removed +0842 geneShortNames=regexprep(geneShortNames,' \[.+$',''); +0843 %grRules obtained from modifier fields contain geneNames. These are +0844 %changed into geneIDs. grRulesFromModifier is a good way to have +0845 %geneIDs and rxns association when it's important to resolve +0846 %systematic name ambiguities +0847 grRulesFromModifier=regexprep(regexprep(grRulesFromModifier,'\[|\]','_'),regexprep(geneNames,'\[|\]','_'),geneIDs); +0848 grRules=regexprep(regexprep(grRules,'\[|\]','_'),regexprep(geneNames,'\[|\]','_'),geneIDs); +0849 +0850 %Yeast 7.6 contains several metabolites, which were used in gene +0851 %associations. For that reason, the list of species ID is created +0852 %and we then check whether any of them have kegg.genes annotation +0853 %thereby obtaining systematic gene names +0854 geneShortNames=vertcat(geneShortNames,metaboliteNames); +0855 geneIDs=vertcat(geneIDs,metaboliteIDs); +0856 geneSystNames=extractMiriam(vertcat(geneMiriams,metaboliteMiriams),'kegg.genes'); +0857 geneCompartments=vertcat(geneCompartments,metaboliteCompartments); +0858 geneMiriams=vertcat(geneMiriams,metaboliteMiriams); +0859 +0860 %Now we retain information for only these entries, which have +0861 %kegg.genes annotation +0862 geneShortNames=geneShortNames(~cellfun('isempty',geneSystNames)); +0863 geneIDs=geneIDs(~cellfun('isempty',geneSystNames)); +0864 geneSystNames=geneSystNames(~cellfun('isempty',geneSystNames)); +0865 geneCompartments=geneCompartments(~cellfun('isempty',geneSystNames)); +0866 geneMiriams=geneMiriams(~cellfun('isempty',geneSystNames)); +0867 %Now we reorder geneIDs and geneSystNames by geneSystNames string +0868 %length +0869 geneNames=geneIDs;%Backuping geneIDs, since we need unsorted order for later +0870 [~, Indx] = sort(cellfun('size', geneSystNames, 2), 'descend'); +0871 geneIDs = geneIDs(Indx); +0872 geneSystNames = geneSystNames(Indx); +0873 for i=1:numel(geneSystNames) +0874 for j=1:numel(grRules) +0875 if strfind(grRules{j},geneSystNames{i}) +0876 if ~isempty(grRules{j}) +0877 if sum(ismember(geneSystNames,geneSystNames{i}))==1 +0878 grRules{j}=regexprep(grRules{j},geneSystNames{i},geneIDs{i}); +0879 elseif sum(ismember(geneSystNames,geneSystNames{i}))>1 +0880 counter=0; +0881 ovrlpIDs=geneIDs(ismember(geneSystNames,geneSystNames{i})); +0882 for k=1:numel(ovrlpIDs) +0883 if strfind(grRulesFromModifier{j},ovrlpIDs{k}) +0884 counter=counter+1; +0885 grRules{j}=regexprep(grRules{j},geneSystNames{i},ovrlpIDs{k}); +0886 end +0887 if counter>1 +0888 EM=['Gene association is ambiguous for reaction ' modelSBML.reaction(j).id]; +0889 dispEM(EM); +0890 end +0891 end +0892 end +0893 end +0894 end +0895 end +0896 end +0897 end +0898 model.genes=geneNames; +0899 model.grRules=grRules; +0900 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0901 model.grRules = grRules; +0902 model.rxnGeneMat = rxnGeneMat; +0903 +0904 %Match the compartments for genes +0905 [~, J]=ismember(geneCompartments,model.comps); +0906 model.geneComps=J; +0907 else +0908 if ~all(cellfun(@isempty,grRules)) +0909 %If fbc_geneProduct exists, follow the specified gene order, such +0910 %that matching geneShortNames in function below will work +0911 if isfield(modelSBML,'fbc_geneProduct') +0912 genes={modelSBML.fbc_geneProduct.fbc_id}; +0913 +0914 %Get gene Miriams if they were not retrieved above (this occurs +0915 %when genes are stored as fbc_geneProduct instead of species) +0916 if isempty(geneMiriams) +0917 geneMiriams = cell(numel(genes),1); +0918 if isfield(modelSBML.fbc_geneProduct,'sboTerm') && numel(unique([modelSBML.fbc_geneProduct.sboTerm])) == 1 +0919 %If all the SBO terms are identical, don't add them to geneMiriams +0920 modelSBML.fbc_geneProduct = rmfield(modelSBML.fbc_geneProduct,'sboTerm'); +0921 end +0922 for i = 1:numel(genes) +0923 geneMiriams{i}=parseMiriam(modelSBML.fbc_geneProduct(i).annotation); +0924 if isfield(modelSBML.fbc_geneProduct(i),'sboTerm') && ~(modelSBML.fbc_geneProduct(i).sboTerm==-1) +0925 geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},modelSBML.fbc_geneProduct(i).sboTerm); +0926 end +0927 end +0928 end +0929 proteinNames={modelSBML.fbc_geneProduct.fbc_name}; +0930 else +0931 genes=getGeneList(grRules); +0932 end +0933 if strcmpi(genes{1}(1:2),'G_') +0934 genes=regexprep(genes,'^G_',''); +0935 grRules=regexprep(grRules,'^G_',''); +0936 grRules=regexprep(grRules,'\(G_','('); +0937 grRules=regexprep(grRules,' G_',' '); +0938 end +0939 model.genes=genes; +0940 model.grRules=grRules; +0941 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0942 model.grRules = grRules; +0943 model.rxnGeneMat = rxnGeneMat; +0944 end +0945 end +0946 +0947 if all(cellfun(@isempty,geneShortNames)) +0948 if isfield(modelSBML,'fbc_geneProduct') +0949 for i=1:numel(genes) +0950 if ~isempty(modelSBML.fbc_geneProduct(i).fbc_label) +0951 geneShortNames{i,1}=modelSBML.fbc_geneProduct(i).fbc_label; +0952 elseif ~isempty(modelSBML.fbc_geneProduct(i).fbc_name) +0953 geneShortNames{i,1}=modelSBML.fbc_geneProduct(i).fbc_name; +0954 else +0955 geneShortNames{i,1}=''; +0956 end +0957 end +0958 end +0959 end +0960 +0961 %If any InChIs have been loaded +0962 if any(~cellfun(@isempty,metaboliteInChI)) +0963 model.inchis=metaboliteInChI; +0964 end +0965 +0966 %If any formulas have been loaded +0967 if any(~cellfun(@isempty,metaboliteFormula)) +0968 model.metFormulas=metaboliteFormula; +0969 end +0970 +0971 %If any charges have been loaded +0972 if ~isempty(metaboliteCharges) +0973 model.metCharges=metaboliteCharges; +0974 end +0975 +0976 %If any gene short names have been loaded +0977 if any(~cellfun(@isempty,geneShortNames)) +0978 model.geneShortNames=geneShortNames; +0979 end +0980 +0981 %If any Miriam strings for compartments have been loaded +0982 if any(~cellfun(@isempty,compartmentMiriams)) +0983 model.compMiriams=compartmentMiriams; +0984 end +0985 +0986 %If any Miriam strings for metabolites have been loaded +0987 if any(~cellfun(@isempty,metaboliteMiriams)) +0988 model.metMiriams=metaboliteMiriams; +0989 end +0990 +0991 %If any subsystems have been loaded +0992 if any(~cellfun(@isempty,subsystems)) +0993 model.subSystems=subsystems; +0994 end +0995 if any(rxnComps) +0996 if all(rxnComps) +0997 model.rxnComps=rxnComps; +0998 else +0999 if supressWarnings==false +1000 EM='The compartments for the following reactions could not be matched. Ignoring reaction compartment information'; +1001 dispEM(EM,false,model.rxns(rxnComps==0)); +1002 end +1003 end +1004 end +1005 +1006 %If any ec-codes have been loaded +1007 if any(~cellfun(@isempty,eccodes)) +1008 model.eccodes=eccodes; +1009 end +1010 +1011 %If any Miriam strings for reactions have been loaded +1012 if any(~cellfun(@isempty,rxnMiriams)) +1013 model.rxnMiriams=rxnMiriams; +1014 end +1015 +1016 %If any Miriam strings for genes have been loaded +1017 if any(~cellfun(@isempty,geneMiriams)) +1018 model.geneMiriams=geneMiriams; +1019 end +1020 +1021 %If any protein strings have been loaded +1022 if any(~cellfun(@isempty,proteinNames)) +1023 model.proteinNames=proteinNames; +1024 end +1025 +1026 model.unconstrained=metaboliteUnconstrained; 1027 -1028 %Remove unused fields -1029 if isempty(model.annotation) -1030 model=rmfield(model,'annotation'); -1031 end -1032 if isempty(model.compOutside) -1033 model=rmfield(model,'compOutside'); -1034 end -1035 if isempty(model.compMiriams) -1036 model=rmfield(model,'compMiriams'); -1037 end -1038 if isempty(model.rxnComps) -1039 model=rmfield(model,'rxnComps'); +1028 %Convert SBML IDs back into their original strings. Here we are using part +1029 %from convertSBMLID, originating from the COBRA Toolbox +1030 model.rxns=regexprep(model.rxns,'__([0-9]+)__','${char(str2num($1))}'); +1031 model.mets=regexprep(model.mets,'__([0-9]+)__','${char(str2num($1))}'); +1032 model.comps=regexprep(model.comps,'__([0-9]+)__','${char(str2num($1))}'); +1033 model.grRules=regexprep(model.grRules,'__([0-9]+)__','${char(str2num($1))}'); +1034 model.genes=regexprep(model.genes,'__([0-9]+)__','${char(str2num($1))}'); +1035 model.id=regexprep(model.id,'__([0-9]+)__','${char(str2num($1))}'); +1036 +1037 %Remove unused fields +1038 if isempty(model.annotation) +1039 model=rmfield(model,'annotation'); 1040 end -1041 if isempty(model.grRules) -1042 model=rmfield(model,'grRules'); +1041 if isempty(model.compOutside) +1042 model=rmfield(model,'compOutside'); 1043 end -1044 if isempty(model.rxnGeneMat) -1045 model=rmfield(model,'rxnGeneMat'); +1044 if isempty(model.compMiriams) +1045 model=rmfield(model,'compMiriams'); 1046 end -1047 if isempty(model.subSystems) -1048 model=rmfield(model,'subSystems'); -1049 else -1050 model.subSystems(cellfun(@isempty,subsystems))={{''}}; -1051 end -1052 if isempty(model.eccodes) -1053 model=rmfield(model,'eccodes'); -1054 end -1055 if isempty(model.rxnMiriams) -1056 model=rmfield(model,'rxnMiriams'); -1057 end -1058 if cellfun(@isempty,model.rxnNotes) -1059 model=rmfield(model,'rxnNotes'); +1047 if isempty(model.rxnComps) +1048 model=rmfield(model,'rxnComps'); +1049 end +1050 if isempty(model.grRules) +1051 model=rmfield(model,'grRules'); +1052 end +1053 if isempty(model.rxnGeneMat) +1054 model=rmfield(model,'rxnGeneMat'); +1055 end +1056 if isempty(model.subSystems) +1057 model=rmfield(model,'subSystems'); +1058 else +1059 model.subSystems(cellfun(@isempty,subsystems))={{''}}; 1060 end -1061 if cellfun(@isempty,model.rxnReferences) -1062 model=rmfield(model,'rxnReferences'); +1061 if isempty(model.eccodes) +1062 model=rmfield(model,'eccodes'); 1063 end -1064 if isempty(model.rxnConfidenceScores) || all(isnan(model.rxnConfidenceScores)) -1065 model=rmfield(model,'rxnConfidenceScores'); +1064 if isempty(model.rxnMiriams) +1065 model=rmfield(model,'rxnMiriams'); 1066 end -1067 if isempty(model.genes) -1068 model=rmfield(model,'genes'); -1069 elseif isrow(model.genes) -1070 model.genes=transpose(model.genes); -1071 end -1072 if isempty(model.geneComps) -1073 model=rmfield(model,'geneComps'); -1074 end -1075 if isempty(model.geneMiriams) -1076 model=rmfield(model,'geneMiriams'); -1077 end -1078 if isempty(model.geneShortNames) -1079 model=rmfield(model,'geneShortNames'); +1067 if cellfun(@isempty,model.rxnNotes) +1068 model=rmfield(model,'rxnNotes'); +1069 end +1070 if cellfun(@isempty,model.rxnReferences) +1071 model=rmfield(model,'rxnReferences'); +1072 end +1073 if isempty(model.rxnConfidenceScores) || all(isnan(model.rxnConfidenceScores)) +1074 model=rmfield(model,'rxnConfidenceScores'); +1075 end +1076 if isempty(model.genes) +1077 model=rmfield(model,'genes'); +1078 elseif isrow(model.genes) +1079 model.genes=transpose(model.genes); 1080 end -1081 if isempty(model.inchis) -1082 model=rmfield(model,'inchis'); +1081 if isempty(model.geneComps) +1082 model=rmfield(model,'geneComps'); 1083 end -1084 if isempty(model.metFormulas) -1085 model=rmfield(model,'metFormulas'); +1084 if isempty(model.geneMiriams) +1085 model=rmfield(model,'geneMiriams'); 1086 end -1087 if isempty(model.metMiriams) -1088 model=rmfield(model,'metMiriams'); +1087 if isempty(model.geneShortNames) +1088 model=rmfield(model,'geneShortNames'); 1089 end -1090 if ~any(model.metCharges) -1091 model=rmfield(model,'metCharges'); +1090 if isempty(model.proteinNames) +1091 model=rmfield(model,'proteinNames'); 1092 end -1093 -1094 %This just removes the grRules if no genes have been loaded -1095 if ~isfield(model,'genes') && isfield(model,'grRules') -1096 model=rmfield(model,'grRules'); -1097 end -1098 -1099 %Print warnings about bad structure -1100 if supressWarnings==false -1101 checkModelStruct(model,false); -1102 end -1103 -1104 if removeExcMets==true -1105 model=simplifyModel(model); -1106 end -1107 end -1108 -1109 function matchGenes=getGeneList(grRules) -1110 %Constructs the list of unique genes from grRules -1111 -1112 %Assumes that everything that isn't a paranthesis, " AND " or " or " is a -1113 %gene name -1114 genes=strrep(grRules,'(',''); -1115 genes=strrep(genes,')',''); -1116 genes=strrep(genes,' or ',' '); -1117 genes=strrep(genes,' and ',' '); -1118 genes=strrep(genes,' OR ',' '); -1119 genes=strrep(genes,' AND ',' '); -1120 genes=regexp(genes,' ','split'); -1121 -1122 allNames={}; -1123 for i=1:numel(genes) -1124 allNames=[allNames genes{i}]; -1125 end -1126 matchGenes=unique(allNames)'; -1127 -1128 %Remove the empty element if present -1129 if isempty(matchGenes{1}) -1130 matchGenes(1)=[]; -1131 end -1132 end +1093 if isempty(model.inchis) +1094 model=rmfield(model,'inchis'); +1095 end +1096 if isempty(model.metFormulas) +1097 model=rmfield(model,'metFormulas'); +1098 end +1099 if isempty(model.metMiriams) +1100 model=rmfield(model,'metMiriams'); +1101 end +1102 if ~any(model.metCharges) +1103 model=rmfield(model,'metCharges'); +1104 end +1105 +1106 %This just removes the grRules if no genes have been loaded +1107 if ~isfield(model,'genes') && isfield(model,'grRules') +1108 model=rmfield(model,'grRules'); +1109 end +1110 +1111 %Print warnings about bad structure +1112 if supressWarnings==false +1113 checkModelStruct(model,false); +1114 end +1115 +1116 if removeExcMets==true +1117 model=simplifyModel(model); +1118 end +1119 end +1120 +1121 function matchGenes=getGeneList(grRules) +1122 %Constructs the list of unique genes from grRules +1123 +1124 %Assumes that everything that isn't a paranthesis, " AND " or " or " is a +1125 %gene name +1126 genes=strrep(grRules,'(',''); +1127 genes=strrep(genes,')',''); +1128 genes=strrep(genes,' or ',' '); +1129 genes=strrep(genes,' and ',' '); +1130 genes=strrep(genes,' OR ',' '); +1131 genes=strrep(genes,' AND ',' '); +1132 genes=regexp(genes,' ','split'); 1133 -1134 function fieldContent=parseNote(searchString,fieldName) -1135 %The function obtains the particular information from 'notes' field, using -1136 %fieldName as the dummy string -1137 -1138 fieldContent=''; +1134 allNames={}; +1135 for i=1:numel(genes) +1136 allNames=[allNames genes{i}]; +1137 end +1138 matchGenes=unique(allNames)'; 1139 -1140 if strfind(searchString,fieldName) -1141 [~,targetString] = regexp(searchString,['<p>' fieldName '.*?</p>'],'tokens','match'); -1142 targetString=regexprep(targetString,'<p>|</p>',''); -1143 targetString=regexprep(targetString,[fieldName, ':'],''); -1144 for i=1:numel(targetString) -1145 fieldContent=[fieldContent ';' strtrim(targetString{1,i})]; -1146 end -1147 fieldContent=regexprep(fieldContent,'^;|;$',''); -1148 else -1149 fieldContent=''; -1150 end -1151 end -1152 -1153 function fieldContent=parseAnnotation(searchString,startString,midString,fieldName) -1154 -1155 fieldContent=''; -1156 -1157 %Removing whitespace characters from the ending strings, which may occur in -1158 %several cases -1159 searchString=regexprep(searchString,'" />','"/>'); -1160 [~,targetString] = regexp(searchString,['<rdf:li rdf:resource="' startString fieldName midString '.*?"/>'],'tokens','match'); -1161 targetString=regexprep(targetString,'<rdf:li rdf:resource="|"/>',''); -1162 targetString=regexprep(targetString,startString,''); -1163 targetString=regexprep(targetString,[fieldName midString],''); +1140 %Remove the empty element if present +1141 if isempty(matchGenes{1}) +1142 matchGenes(1)=[]; +1143 end +1144 end +1145 +1146 function fieldContent=parseNote(searchString,fieldName) +1147 %The function obtains the particular information from 'notes' field, using +1148 %fieldName as the dummy string +1149 +1150 fieldContent=''; +1151 +1152 if strfind(searchString,fieldName) +1153 [~,targetString] = regexp(searchString,['<p>' fieldName '.*?</p>'],'tokens','match'); +1154 targetString=regexprep(targetString,'<p>|</p>',''); +1155 targetString=regexprep(targetString,[fieldName, ':'],''); +1156 for i=1:numel(targetString) +1157 fieldContent=[fieldContent ';' strtrim(targetString{1,i})]; +1158 end +1159 fieldContent=regexprep(fieldContent,'^;|;$',''); +1160 else +1161 fieldContent=''; +1162 end +1163 end 1164 -1165 for i=1:numel(targetString) -1166 fieldContent=[fieldContent ';' strtrim(targetString{1,i})]; -1167 end +1165 function fieldContent=parseAnnotation(searchString,startString,midString,fieldName) +1166 +1167 fieldContent=''; 1168 -1169 fieldContent=regexprep(fieldContent,'^;|;$',''); -1170 end -1171 -1172 function miriamStruct=parseMiriam(searchString) -1173 %Generates miriam structure from annotation field -1174 -1175 %Finding whether miriams are written in the old or the new way -1176 if strfind(searchString,'urn:miriam:') -1177 startString='urn:miriam:'; -1178 midString=':'; -1179 elseif strfind(searchString,'http://identifiers.org/') -1180 startString='http://identifiers.org/'; -1181 midString='/'; -1182 elseif strfind(searchString,'https://identifiers.org/') -1183 startString='https://identifiers.org/'; -1184 midString='/'; -1185 else -1186 miriamStruct=[]; -1187 return; -1188 end -1189 -1190 miriamStruct=[]; -1191 -1192 searchString=regexprep(searchString,'" />','"/>'); -1193 [~,targetString] = regexp(searchString,'<rdf:li rdf:resource=".*?"/>','tokens','match'); -1194 targetString=regexprep(targetString,'<rdf:li rdf:resource="|"/>',''); -1195 targetString=regexprep(targetString,startString,''); -1196 targetString=regexprep(targetString,midString,'/','once'); -1197 -1198 counter=0; -1199 for i=1:numel(targetString) -1200 if isempty(regexp(targetString{1,i},'inchi|ec-code', 'once')) -1201 counter=counter+1; -1202 miriamStruct.name{counter,1} = regexprep(targetString{1,i},'/.+','','once'); -1203 miriamStruct.value{counter,1} = regexprep(targetString{1,i},[miriamStruct.name{counter,1} '/'],'','once'); -1204 miriamStruct.name{counter,1} = regexprep(miriamStruct.name{counter,1},'^obo\.',''); -1205 end -1206 end -1207 end -1208 -1209 function miriam = addSBOtoMiriam(miriam,sboTerm) -1210 %Appends SBO term to miriam structure -1211 -1212 sboTerm = {['SBO:' sprintf('%07u',sboTerm)]}; % convert to proper format -1213 if isempty(miriam) -1214 miriam.name = {'sbo'}; -1215 miriam.value = sboTerm; -1216 elseif any(strcmp('sbo',miriam.name)) -1217 currSbo = strcmp('sbo',miriam.name); -1218 miriam.value(currSbo) = sboTerm; -1219 else -1220 miriam.name(end+1) = {'sbo'}; -1221 miriam.value(end+1) = sboTerm; -1222 end -1223 end +1169 %Removing whitespace characters from the ending strings, which may occur in +1170 %several cases +1171 searchString=regexprep(searchString,'" />','"/>'); +1172 [~,targetString] = regexp(searchString,['<rdf:li rdf:resource="' startString fieldName midString '.*?"/>'],'tokens','match'); +1173 targetString=regexprep(targetString,'<rdf:li rdf:resource="|"/>',''); +1174 targetString=regexprep(targetString,startString,''); +1175 targetString=regexprep(targetString,[fieldName midString],''); +1176 +1177 for i=1:numel(targetString) +1178 fieldContent=[fieldContent ';' strtrim(targetString{1,i})]; +1179 end +1180 +1181 fieldContent=regexprep(fieldContent,'^;|;$',''); +1182 end +1183 +1184 function miriamStruct=parseMiriam(searchString) +1185 %Generates miriam structure from annotation field +1186 +1187 %Finding whether miriams are written in the old or the new way +1188 if strfind(searchString,'urn:miriam:') +1189 startString='urn:miriam:'; +1190 midString=':'; +1191 elseif strfind(searchString,'http://identifiers.org/') +1192 startString='http://identifiers.org/'; +1193 midString='/'; +1194 elseif strfind(searchString,'https://identifiers.org/') +1195 startString='https://identifiers.org/'; +1196 midString='/'; +1197 else +1198 miriamStruct=[]; +1199 return; +1200 end +1201 +1202 miriamStruct=[]; +1203 +1204 searchString=regexprep(searchString,'" />','"/>'); +1205 [~,targetString] = regexp(searchString,'<rdf:li rdf:resource=".*?"/>','tokens','match'); +1206 targetString=regexprep(targetString,'<rdf:li rdf:resource="|"/>',''); +1207 targetString=regexprep(targetString,startString,''); +1208 targetString=regexprep(targetString,midString,'/','once'); +1209 +1210 counter=0; +1211 for i=1:numel(targetString) +1212 if isempty(regexp(targetString{1,i},'inchi|ec-code', 'once')) +1213 counter=counter+1; +1214 miriamStruct.name{counter,1} = regexprep(targetString{1,i},'/.+','','once'); +1215 miriamStruct.value{counter,1} = regexprep(targetString{1,i},[miriamStruct.name{counter,1} '/'],'','once'); +1216 miriamStruct.name{counter,1} = regexprep(miriamStruct.name{counter,1},'^obo\.',''); +1217 end +1218 end +1219 end +1220 +1221 function miriam = addSBOtoMiriam(miriam,sboTerm) +1222 %Appends SBO term to miriam structure +1223 +1224 sboTerm = {['SBO:' sprintf('%07u',sboTerm)]}; % convert to proper format +1225 if isempty(miriam) +1226 miriam.name = {'sbo'}; +1227 miriam.value = sboTerm; +1228 elseif any(strcmp('sbo',miriam.name)) +1229 currSbo = strcmp('sbo',miriam.name); +1230 miriam.value(currSbo) = sboTerm; +1231 else +1232 miriam.name(end+1) = {'sbo'}; +1233 miriam.value(end+1) = sboTerm; +1234 end +1235 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/io/readYAMLmodel.html b/doc/io/readYAMLmodel.html index bb38e6dd..5cf8e953 100644 --- a/doc/io/readYAMLmodel.html +++ b/doc/io/readYAMLmodel.html @@ -157,582 +157,585 @@

SOURCE CODE ^'geneComps',cell(0,0);... %Changed to double in the end. 0101 'geneMiriams',cell(0,0);... 0102 'geneShortNames',cell(0,0);... -0103 'unconstrained',cell(0,0);... %Changed to double in the end. -0104 'metFrom',cell(0,0);... -0105 'rxnFrom',cell(0,0)}; -0106 for i=1:size(modelFields,1) -0107 model.(modelFields{i,1})=modelFields{i,2}; -0108 end -0109 -0110 % If GECKO model -0111 if any(contains(line_key,'geckoLight')) -0112 isGECKO=true; -0113 ecFields = {'geckoLight', false;... -0114 'rxns', {};... -0115 'kcat', {};... -0116 'source', cell(0,0);... -0117 'notes', cell(0,0);... -0118 'eccodes', cell(0,0);... -0119 'genes', cell(0,0);... -0120 'enzymes', cell(0,0);... -0121 'mw', cell(0,0);... -0122 'sequence', cell(0,0);... -0123 'concs', cell(0,0);... -0124 'rxnEnzMat', []}; -0125 for i=1:size(ecFields,1) -0126 model.ec.(ecFields{i,1})=ecFields{i,2}; -0127 end -0128 ecGecko=cell(25000,2); ecGeckoNo=1; -0129 enzStoich=cell(100000,3); enzStoichNo=1; -0130 else -0131 isGECKO=false; -0132 end -0133 -0134 section = 0; -0135 metMiriams=cell(100000,3); metMirNo=1; -0136 rxnMiriams=cell(100000,3); rxnMirNo=1; -0137 geneMiriams=cell(100000,3); genMirNo=1; -0138 subSystems=cell(100000,2); subSysNo=1; -0139 eccodes=cell(100000,2); ecCodeNo=1; -0140 equations=cell(100000,3); equatiNo=1; -0141 -0142 for i=1:numel(line_key) -0143 tline_raw = line_raw{i}; -0144 tline_key = line_key{i}; -0145 tline_value = line_value{i}; -0146 % import different sections -0147 switch tline_raw -0148 case '- metaData:' -0149 section = 1; -0150 if verbose -0151 fprintf('\t%d\n', section); -0152 end -0153 continue % Go to next line -0154 case '- metabolites:' -0155 section = 2; -0156 if verbose -0157 fprintf('\t%d\n', section); -0158 end -0159 pos=0; -0160 continue -0161 case '- reactions:' -0162 section = 3; -0163 if verbose -0164 fprintf('\t%d\n', section); -0165 end -0166 pos=0; -0167 continue -0168 case '- genes:' -0169 section = 4; -0170 if verbose -0171 fprintf('\t%d\n', section); -0172 end -0173 pos=0; -0174 continue -0175 case '- compartments: !!omap' -0176 section = 5; -0177 if verbose -0178 fprintf('\t%d\n', section); -0179 end -0180 pos=0; -0181 continue -0182 case '- ec-rxns:' -0183 section = 6; -0184 if verbose -0185 fprintf('\t%d\n', section); -0186 end -0187 pos=0; -0188 continue -0189 case '- ec-enzymes:' -0190 section = 7; -0191 if verbose -0192 fprintf('\t%d\n', section); -0193 end -0194 pos=0; -0195 continue -0196 end -0197 -0198 % skip over empty keys -0199 if isempty(tline_raw) || (isempty(tline_key) && contains(tline_raw,'!!omap')) -0200 continue; -0201 end -0202 -0203 % import metaData -0204 if section == 1 -0205 switch tline_key -0206 case {'short_name','id'} %short_name used by human-GEM -0207 model.id = tline_value; -0208 case 'name' -0209 model.name = tline_value; -0210 case 'full_name' %used by human-GEM -0211 model.description = tline_value; -0212 case 'version' -0213 model.version = tline_value; -0214 case 'date' -0215 model.date = tline_value; -0216 case 'taxonomy' -0217 model.annotation.taxonomy = tline_value; -0218 case {'description','note'} %description used by human-GEM -0219 model.annotation.note = tline_value; -0220 case 'github' -0221 model.annotation.sourceUrl = tline_value; -0222 case 'givenName' -0223 model.annotation.givenName = tline_value; -0224 case 'familyName' -0225 model.annotation.familyName = tline_value; -0226 case 'authors' -0227 model.annotation.authorList = tline_value; -0228 case 'email' -0229 model.annotation.email = tline_value; -0230 case 'organization' -0231 model.annotation.organization = tline_value; -0232 case 'geckoLight' -0233 if strcmp(tline_value,'true') -0234 model.ec.geckoLight = true; -0235 end -0236 end; continue -0237 end -0238 -0239 % import metabolites: -0240 if section == 2 -0241 switch tline_key -0242 case 'id' -0243 pos = pos + 1; -0244 model = readFieldValue(model, 'mets', tline_value,pos); -0245 readList=''; miriamKey=''; -0246 case 'name' -0247 model = readFieldValue(model, 'metNames', tline_value, pos); -0248 readList=''; miriamKey=''; -0249 case 'compartment' -0250 model = readFieldValue(model, 'metComps', tline_value, pos); -0251 readList=''; miriamKey=''; -0252 case 'formula' -0253 model = readFieldValue(model, 'metFormulas', tline_value, pos); -0254 readList=''; miriamKey=''; -0255 case 'charge' -0256 model = readFieldValue(model, 'metCharges', tline_value, pos); -0257 readList=''; miriamKey=''; -0258 case 'notes' -0259 model = readFieldValue(model, 'metNotes', tline_value, pos); -0260 readList=''; miriamKey=''; -0261 case 'inchis' -0262 model = readFieldValue(model, 'inchis', tline_value, pos); -0263 readList=''; miriamKey=''; -0264 case 'smiles' -0265 model = readFieldValue(model, 'metSmiles', tline_value, pos); -0266 readList=''; miriamKey=''; -0267 case 'deltaG' -0268 model = readFieldValue(model, 'metDeltaG', tline_value, pos); -0269 readList=''; miriamKey=''; -0270 case 'metFrom' -0271 model = readFieldValue(model, 'metFrom', tline_value, pos); -0272 readList=''; miriamKey=''; -0273 case 'annotation' -0274 readList = 'annotation'; -0275 otherwise -0276 switch readList -0277 case 'annotation' -0278 [metMiriams, miriamKey] = gatherAnnotation(pos,metMiriams,tline_key,tline_value,miriamKey,metMirNo); -0279 metMirNo = metMirNo + 1; -0280 otherwise -0281 error(['Unknown entry in yaml file: ' tline_raw]) -0282 end -0283 end; continue -0284 end -0285 -0286 % import reactions: -0287 if section == 3 -0288 switch tline_key -0289 case 'id' -0290 pos = pos + 1; -0291 model = readFieldValue(model, 'rxns', tline_value,pos); -0292 readList=''; miriamKey=''; -0293 case 'name' -0294 model = readFieldValue(model, 'rxnNames', tline_value, pos); -0295 readList=''; miriamKey=''; -0296 case 'lower_bound' -0297 model.lb(pos,1) = {tline_value}; -0298 readList=''; miriamKey=''; -0299 case 'upper_bound' -0300 model.ub(pos,1) = {tline_value}; -0301 readList=''; miriamKey=''; -0302 case 'rev' -0303 model.rev(pos,1) = {tline_value}; -0304 readList=''; miriamKey=''; -0305 case 'gene_reaction_rule' -0306 model = readFieldValue(model, 'grRules', tline_value, pos); -0307 readList=''; miriamKey=''; -0308 case 'rxnNotes' -0309 model = readFieldValue(model, 'rxnNotes', tline_value, pos); -0310 readList=''; miriamKey=''; -0311 case 'rxnFrom' -0312 model = readFieldValue(model, 'rxnFrom', tline_value, pos); -0313 readList=''; miriamKey=''; -0314 case 'deltaG' -0315 model = readFieldValue(model, 'rxnDeltaG', tline_value, pos); -0316 readList=''; miriamKey=''; -0317 case 'objective_coefficient' -0318 model.c(pos,1) = 1; -0319 readList=''; miriamKey=''; -0320 case 'references' -0321 model = readFieldValue(model, 'rxnReferences', tline_value, pos); -0322 readList=''; miriamKey=''; -0323 case 'confidence_score' -0324 model = readFieldValue(model, 'rxnConfidenceScores', tline_value, pos); -0325 readList=''; miriamKey=''; -0326 case 'eccodes' -0327 if isempty(tline_value) -0328 readList = 'eccodes'; -0329 else -0330 eccodes(ecCodeNo,1:2)={pos,tline_value}; -0331 ecCodeNo=ecCodeNo+1; -0332 end -0333 case 'subsystem' -0334 if isempty(tline_value) -0335 readList = 'subsystem'; -0336 else -0337 subSystems(subSysNo,1:2)={pos,tline_value}; -0338 subSysNo=subSysNo+1; -0339 end -0340 case 'metabolites' -0341 readList = 'equation'; -0342 case 'annotation' -0343 readList = 'annotation'; -0344 -0345 otherwise -0346 switch readList -0347 case 'eccodes' -0348 eccodes(ecCodeNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; -0349 ecCodeNo=ecCodeNo+1; -0350 case 'subsystem' -0351 subSystems(subSysNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; -0352 subSysNo=subSysNo+1; -0353 case 'annotation' -0354 [rxnMiriams, miriamKey,rxnMirNo] = gatherAnnotation(pos,rxnMiriams,tline_key,tline_value,miriamKey,rxnMirNo); -0355 rxnMirNo=rxnMirNo+1; -0356 case 'equation' -0357 coeff = sscanf(tline_value,'%f'); -0358 equations(equatiNo,1:3)={pos,tline_key,coeff}; -0359 equatiNo=equatiNo+1; -0360 otherwise -0361 error(['Unknown entry in yaml file: ' tline_raw]) -0362 end -0363 end; continue -0364 end -0365 -0366 % import genes: -0367 if section == 4 -0368 switch tline_key -0369 case 'id' -0370 pos = pos + 1; -0371 model = readFieldValue(model, 'genes', tline_value, pos); -0372 readList = ''; -0373 miriamKey = ''; -0374 case 'name' -0375 model = readFieldValue(model, 'geneShortNames', tline_value, pos); -0376 case 'annotation' -0377 readList = 'annotation'; -0378 otherwise -0379 switch readList -0380 case 'annotation' -0381 [geneMiriams, miriamKey] = gatherAnnotation(pos,geneMiriams,tline_key,tline_value,miriamKey,genMirNo); -0382 genMirNo = genMirNo + 1; -0383 otherwise -0384 error(['Unknown entry in yaml file: ' tline_raw]) -0385 end -0386 end; continue -0387 end -0388 -0389 % import compartments: -0390 if section == 5 -0391 model.comps(end+1,1) = {tline_key}; -0392 model.compNames(end+1,1) = {tline_value}; -0393 end -0394 -0395 % import ec reaction info -0396 if section == 6 -0397 switch tline_key -0398 case 'id' -0399 pos = pos + 1; -0400 model.ec = readFieldValue(model.ec, 'rxns', tline_value, pos); -0401 readList=''; -0402 case 'kcat' -0403 model.ec = readFieldValue(model.ec, 'kcat', tline_value, pos); +0103 'proteinNames',cell(0,0);... +0104 'unconstrained',cell(0,0);... %Changed to double in the end. +0105 'metFrom',cell(0,0);... +0106 'rxnFrom',cell(0,0)}; +0107 for i=1:size(modelFields,1) +0108 model.(modelFields{i,1})=modelFields{i,2}; +0109 end +0110 +0111 % If GECKO model +0112 if any(contains(line_key,'geckoLight')) +0113 isGECKO=true; +0114 ecFields = {'geckoLight', false;... +0115 'rxns', {};... +0116 'kcat', {};... +0117 'source', cell(0,0);... +0118 'notes', cell(0,0);... +0119 'eccodes', cell(0,0);... +0120 'genes', cell(0,0);... +0121 'enzymes', cell(0,0);... +0122 'mw', cell(0,0);... +0123 'sequence', cell(0,0);... +0124 'concs', cell(0,0);... +0125 'rxnEnzMat', []}; +0126 for i=1:size(ecFields,1) +0127 model.ec.(ecFields{i,1})=ecFields{i,2}; +0128 end +0129 ecGecko=cell(25000,2); ecGeckoNo=1; +0130 enzStoich=cell(100000,3); enzStoichNo=1; +0131 else +0132 isGECKO=false; +0133 end +0134 +0135 section = 0; +0136 metMiriams=cell(100000,3); metMirNo=1; +0137 rxnMiriams=cell(100000,3); rxnMirNo=1; +0138 geneMiriams=cell(100000,3); genMirNo=1; +0139 subSystems=cell(100000,2); subSysNo=1; +0140 eccodes=cell(100000,2); ecCodeNo=1; +0141 equations=cell(100000,3); equatiNo=1; +0142 +0143 for i=1:numel(line_key) +0144 tline_raw = line_raw{i}; +0145 tline_key = line_key{i}; +0146 tline_value = line_value{i}; +0147 % import different sections +0148 switch tline_raw +0149 case '- metaData:' +0150 section = 1; +0151 if verbose +0152 fprintf('\t%d\n', section); +0153 end +0154 continue % Go to next line +0155 case '- metabolites:' +0156 section = 2; +0157 if verbose +0158 fprintf('\t%d\n', section); +0159 end +0160 pos=0; +0161 continue +0162 case '- reactions:' +0163 section = 3; +0164 if verbose +0165 fprintf('\t%d\n', section); +0166 end +0167 pos=0; +0168 continue +0169 case '- genes:' +0170 section = 4; +0171 if verbose +0172 fprintf('\t%d\n', section); +0173 end +0174 pos=0; +0175 continue +0176 case '- compartments: !!omap' +0177 section = 5; +0178 if verbose +0179 fprintf('\t%d\n', section); +0180 end +0181 pos=0; +0182 continue +0183 case '- ec-rxns:' +0184 section = 6; +0185 if verbose +0186 fprintf('\t%d\n', section); +0187 end +0188 pos=0; +0189 continue +0190 case '- ec-enzymes:' +0191 section = 7; +0192 if verbose +0193 fprintf('\t%d\n', section); +0194 end +0195 pos=0; +0196 continue +0197 end +0198 +0199 % skip over empty keys +0200 if isempty(tline_raw) || (isempty(tline_key) && contains(tline_raw,'!!omap')) +0201 continue; +0202 end +0203 +0204 % import metaData +0205 if section == 1 +0206 switch tline_key +0207 case {'short_name','id'} %short_name used by human-GEM +0208 model.id = tline_value; +0209 case 'name' +0210 model.name = tline_value; +0211 case 'full_name' %used by human-GEM +0212 model.description = tline_value; +0213 case 'version' +0214 model.version = tline_value; +0215 case 'date' +0216 model.date = tline_value; +0217 case 'taxonomy' +0218 model.annotation.taxonomy = tline_value; +0219 case {'description','note'} %description used by human-GEM +0220 model.annotation.note = tline_value; +0221 case 'github' +0222 model.annotation.sourceUrl = tline_value; +0223 case 'givenName' +0224 model.annotation.givenName = tline_value; +0225 case 'familyName' +0226 model.annotation.familyName = tline_value; +0227 case 'authors' +0228 model.annotation.authorList = tline_value; +0229 case 'email' +0230 model.annotation.email = tline_value; +0231 case 'organization' +0232 model.annotation.organization = tline_value; +0233 case 'geckoLight' +0234 if strcmp(tline_value,'true') +0235 model.ec.geckoLight = true; +0236 end +0237 end; continue +0238 end +0239 +0240 % import metabolites: +0241 if section == 2 +0242 switch tline_key +0243 case 'id' +0244 pos = pos + 1; +0245 model = readFieldValue(model, 'mets', tline_value,pos); +0246 readList=''; miriamKey=''; +0247 case 'name' +0248 model = readFieldValue(model, 'metNames', tline_value, pos); +0249 readList=''; miriamKey=''; +0250 case 'compartment' +0251 model = readFieldValue(model, 'metComps', tline_value, pos); +0252 readList=''; miriamKey=''; +0253 case 'formula' +0254 model = readFieldValue(model, 'metFormulas', tline_value, pos); +0255 readList=''; miriamKey=''; +0256 case 'charge' +0257 model = readFieldValue(model, 'metCharges', tline_value, pos); +0258 readList=''; miriamKey=''; +0259 case 'notes' +0260 model = readFieldValue(model, 'metNotes', tline_value, pos); +0261 readList=''; miriamKey=''; +0262 case 'inchis' +0263 model = readFieldValue(model, 'inchis', tline_value, pos); +0264 readList=''; miriamKey=''; +0265 case 'smiles' +0266 model = readFieldValue(model, 'metSmiles', tline_value, pos); +0267 readList=''; miriamKey=''; +0268 case 'deltaG' +0269 model = readFieldValue(model, 'metDeltaG', tline_value, pos); +0270 readList=''; miriamKey=''; +0271 case 'metFrom' +0272 model = readFieldValue(model, 'metFrom', tline_value, pos); +0273 readList=''; miriamKey=''; +0274 case 'annotation' +0275 readList = 'annotation'; +0276 otherwise +0277 switch readList +0278 case 'annotation' +0279 [metMiriams, miriamKey] = gatherAnnotation(pos,metMiriams,tline_key,tline_value,miriamKey,metMirNo); +0280 metMirNo = metMirNo + 1; +0281 otherwise +0282 error(['Unknown entry in yaml file: ' tline_raw]) +0283 end +0284 end; continue +0285 end +0286 +0287 % import reactions: +0288 if section == 3 +0289 switch tline_key +0290 case 'id' +0291 pos = pos + 1; +0292 model = readFieldValue(model, 'rxns', tline_value,pos); +0293 readList=''; miriamKey=''; +0294 case 'name' +0295 model = readFieldValue(model, 'rxnNames', tline_value, pos); +0296 readList=''; miriamKey=''; +0297 case 'lower_bound' +0298 model.lb(pos,1) = {tline_value}; +0299 readList=''; miriamKey=''; +0300 case 'upper_bound' +0301 model.ub(pos,1) = {tline_value}; +0302 readList=''; miriamKey=''; +0303 case 'rev' +0304 model.rev(pos,1) = {tline_value}; +0305 readList=''; miriamKey=''; +0306 case 'gene_reaction_rule' +0307 model = readFieldValue(model, 'grRules', tline_value, pos); +0308 readList=''; miriamKey=''; +0309 case 'rxnNotes' +0310 model = readFieldValue(model, 'rxnNotes', tline_value, pos); +0311 readList=''; miriamKey=''; +0312 case 'rxnFrom' +0313 model = readFieldValue(model, 'rxnFrom', tline_value, pos); +0314 readList=''; miriamKey=''; +0315 case 'deltaG' +0316 model = readFieldValue(model, 'rxnDeltaG', tline_value, pos); +0317 readList=''; miriamKey=''; +0318 case 'objective_coefficient' +0319 model.c(pos,1) = 1; +0320 readList=''; miriamKey=''; +0321 case 'references' +0322 model = readFieldValue(model, 'rxnReferences', tline_value, pos); +0323 readList=''; miriamKey=''; +0324 case 'confidence_score' +0325 model = readFieldValue(model, 'rxnConfidenceScores', tline_value, pos); +0326 readList=''; miriamKey=''; +0327 case 'eccodes' +0328 if isempty(tline_value) +0329 readList = 'eccodes'; +0330 else +0331 eccodes(ecCodeNo,1:2)={pos,tline_value}; +0332 ecCodeNo=ecCodeNo+1; +0333 end +0334 case 'subsystem' +0335 if isempty(tline_value) +0336 readList = 'subsystem'; +0337 else +0338 subSystems(subSysNo,1:2)={pos,tline_value}; +0339 subSysNo=subSysNo+1; +0340 end +0341 case 'metabolites' +0342 readList = 'equation'; +0343 case 'annotation' +0344 readList = 'annotation'; +0345 +0346 otherwise +0347 switch readList +0348 case 'eccodes' +0349 eccodes(ecCodeNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; +0350 ecCodeNo=ecCodeNo+1; +0351 case 'subsystem' +0352 subSystems(subSysNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; +0353 subSysNo=subSysNo+1; +0354 case 'annotation' +0355 [rxnMiriams, miriamKey,rxnMirNo] = gatherAnnotation(pos,rxnMiriams,tline_key,tline_value,miriamKey,rxnMirNo); +0356 rxnMirNo=rxnMirNo+1; +0357 case 'equation' +0358 coeff = sscanf(tline_value,'%f'); +0359 equations(equatiNo,1:3)={pos,tline_key,coeff}; +0360 equatiNo=equatiNo+1; +0361 otherwise +0362 error(['Unknown entry in yaml file: ' tline_raw]) +0363 end +0364 end; continue +0365 end +0366 +0367 % import genes: +0368 if section == 4 +0369 switch tline_key +0370 case 'id' +0371 pos = pos + 1; +0372 model = readFieldValue(model, 'genes', tline_value, pos); +0373 readList = ''; +0374 miriamKey = ''; +0375 case 'name' +0376 model = readFieldValue(model, 'geneShortNames', tline_value, pos); +0377 case 'protein' +0378 model = readFieldValue(model, 'proteinNames', tline_value, pos); +0379 case 'annotation' +0380 readList = 'annotation'; +0381 otherwise +0382 switch readList +0383 case 'annotation' +0384 [geneMiriams, miriamKey] = gatherAnnotation(pos,geneMiriams,tline_key,tline_value,miriamKey,genMirNo); +0385 genMirNo = genMirNo + 1; +0386 otherwise +0387 error(['Unknown entry in yaml file: ' tline_raw]) +0388 end +0389 end; continue +0390 end +0391 +0392 % import compartments: +0393 if section == 5 +0394 model.comps(end+1,1) = {tline_key}; +0395 model.compNames(end+1,1) = {tline_value}; +0396 end +0397 +0398 % import ec reaction info +0399 if section == 6 +0400 switch tline_key +0401 case 'id' +0402 pos = pos + 1; +0403 model.ec = readFieldValue(model.ec, 'rxns', tline_value, pos); 0404 readList=''; -0405 case 'source' -0406 model.ec = readFieldValue(model.ec, 'source', tline_value, pos); +0405 case 'kcat' +0406 model.ec = readFieldValue(model.ec, 'kcat', tline_value, pos); 0407 readList=''; -0408 case 'notes' -0409 model.ec = readFieldValue(model.ec, 'notes', tline_value, pos); +0408 case 'source' +0409 model.ec = readFieldValue(model.ec, 'source', tline_value, pos); 0410 readList=''; -0411 case 'eccodes' -0412 if isempty(tline_value) -0413 readList = 'eccodes'; -0414 else -0415 ecGecko(ecGeckoNo,1:2)={pos,tline_value}; -0416 ecGeckoNo=ecGeckoNo+1; -0417 end -0418 case 'enzymes' -0419 readList = 'enzStoich'; -0420 otherwise -0421 switch readList -0422 case 'eccodes' -0423 ecGecko(ecGeckoNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; -0424 ecGeckoNo=ecGeckoNo+1; -0425 case 'enzStoich' -0426 coeff = sscanf(tline_value,'%f'); -0427 enzStoich(enzStoichNo,1:3)={pos,tline_key,coeff}; -0428 enzStoichNo=enzStoichNo+1; -0429 otherwise -0430 error(['Unknown entry in yaml file: ' tline_raw]) -0431 end -0432 end; continue -0433 end -0434 -0435 % import ec enzyme info -0436 if section == 7 -0437 switch tline_key -0438 case 'genes' -0439 pos = pos + 1; -0440 model.ec = readFieldValue(model.ec, 'genes', tline_value, pos); -0441 case 'enzymes' -0442 model.ec = readFieldValue(model.ec, 'enzymes', tline_value, pos); -0443 case 'mw' -0444 model.ec = readFieldValue(model.ec, 'mw', tline_value, pos); -0445 case 'sequence' -0446 model.ec = readFieldValue(model.ec, 'sequence', tline_value, pos); -0447 case 'concs' -0448 model.ec = readFieldValue(model.ec, 'concs', tline_value, pos); -0449 otherwise -0450 error(['Unknown entry in yaml file: ' tline_raw]) -0451 end; continue -0452 end -0453 end -0454 -0455 %Parse annotations -0456 if ~isempty(metMiriams) -0457 locs = cell2mat(metMiriams(:,1)); -0458 for i=unique(locs)' -0459 model.metMiriams{i,1}.name=metMiriams(locs==i,2); -0460 model.metMiriams{i,1}.value=metMiriams(locs==i,3); -0461 end -0462 end -0463 if ~isempty(rxnMiriams) -0464 locs = cell2mat(rxnMiriams(:,1)); -0465 for i=unique(locs)' -0466 model.rxnMiriams{i,1}.name=rxnMiriams(locs==i,2); -0467 model.rxnMiriams{i,1}.value=rxnMiriams(locs==i,3); -0468 end -0469 end -0470 if ~isempty(geneMiriams) -0471 locs = cell2mat(geneMiriams(:,1)); -0472 for i=unique(locs)' -0473 model.geneMiriams{i,1}.name=geneMiriams(locs==i,2); -0474 model.geneMiriams{i,1}.value=geneMiriams(locs==i,3); -0475 end -0476 end -0477 -0478 %Parse subSystems -0479 if ~isempty(subSystems) -0480 locs = cell2mat(subSystems(:,1)); -0481 for i=unique(locs)' -0482 model.subSystems{i,1}=subSystems(locs==i,2); -0483 end -0484 end -0485 -0486 %Parse ec-codes -0487 if ~isempty(eccodes) -0488 locs = cell2mat(eccodes(:,1)); -0489 for i=unique(locs)' -0490 eccodesCat=strjoin(eccodes(locs==i,2),';'); -0491 model.eccodes{i,1}=eccodesCat; -0492 end -0493 emptyEc=cellfun('isempty',model.eccodes); -0494 model.eccodes(emptyEc)={''}; -0495 end -0496 -0497 % follow-up data processing -0498 if verbose -0499 fprintf('\nimporting completed\nfollow-up processing...'); -0500 end -0501 [~, model.metComps] = ismember(model.metComps, model.comps); -0502 [~, model.geneComps] = ismember(model.geneComps, model.comps); -0503 [~, model.rxnComps] = ismember(model.rxnComps, model.comps); -0504 -0505 % Fill S-matrix -0506 rxnIdx = cellfun('isempty', equations(:,1)); -0507 equations(rxnIdx,:) = ''; -0508 rxnIdx = cell2mat(equations(:,1)); -0509 [~,metIdx] = ismember(equations(:,2),model.mets); -0510 coeffs = cell2mat(equations(:,3)); -0511 model.S=sparse(max(metIdx),max(rxnIdx)); -0512 linearIndices = sub2ind([max(metIdx), max(rxnIdx)],metIdx,rxnIdx); -0513 model.S(linearIndices) = coeffs; -0514 -0515 % Convert strings to numeric -0516 model.metCharges = str2double(model.metCharges); -0517 model.lb = str2double(model.lb); -0518 model.ub = str2double(model.ub); -0519 model.rxnConfidenceScores = str2double(model.rxnConfidenceScores); -0520 model.b = zeros(length(model.mets),1); -0521 model.metDeltaG = str2double(model.metDeltaG); -0522 model.rxnDeltaG = str2double(model.rxnDeltaG); -0523 -0524 % Fill some other fields -0525 model.annotation.defaultLB = min(model.lb); -0526 model.annotation.defaultUB = max(model.ub); -0527 if numel(model.lb)<numel(model.rxns) %No LB reported = min -0528 model.lb(end+1:numel(model.rxns)-numel(model.lb),1) = double(model.annotation.defaultLB); -0529 end -0530 if numel(model.ub)<numel(model.rxns) %No UB reported = max -0531 model.ub(end+1:numel(model.rxns)-numel(model.ub),1) = double(model.annotation.defaultUB); +0411 case 'notes' +0412 model.ec = readFieldValue(model.ec, 'notes', tline_value, pos); +0413 readList=''; +0414 case 'eccodes' +0415 if isempty(tline_value) +0416 readList = 'eccodes'; +0417 else +0418 ecGecko(ecGeckoNo,1:2)={pos,tline_value}; +0419 ecGeckoNo=ecGeckoNo+1; +0420 end +0421 case 'enzymes' +0422 readList = 'enzStoich'; +0423 otherwise +0424 switch readList +0425 case 'eccodes' +0426 ecGecko(ecGeckoNo,1:2)={pos,regexprep(tline_value,'^ +- "?(.*)"?$','$1')}; +0427 ecGeckoNo=ecGeckoNo+1; +0428 case 'enzStoich' +0429 coeff = sscanf(tline_value,'%f'); +0430 enzStoich(enzStoichNo,1:3)={pos,tline_key,coeff}; +0431 enzStoichNo=enzStoichNo+1; +0432 otherwise +0433 error(['Unknown entry in yaml file: ' tline_raw]) +0434 end +0435 end; continue +0436 end +0437 +0438 % import ec enzyme info +0439 if section == 7 +0440 switch tline_key +0441 case 'genes' +0442 pos = pos + 1; +0443 model.ec = readFieldValue(model.ec, 'genes', tline_value, pos); +0444 case 'enzymes' +0445 model.ec = readFieldValue(model.ec, 'enzymes', tline_value, pos); +0446 case 'mw' +0447 model.ec = readFieldValue(model.ec, 'mw', tline_value, pos); +0448 case 'sequence' +0449 model.ec = readFieldValue(model.ec, 'sequence', tline_value, pos); +0450 case 'concs' +0451 model.ec = readFieldValue(model.ec, 'concs', tline_value, pos); +0452 otherwise +0453 error(['Unknown entry in yaml file: ' tline_raw]) +0454 end; continue +0455 end +0456 end +0457 +0458 %Parse annotations +0459 if ~isempty(metMiriams) +0460 locs = cell2mat(metMiriams(:,1)); +0461 for i=unique(locs)' +0462 model.metMiriams{i,1}.name=metMiriams(locs==i,2); +0463 model.metMiriams{i,1}.value=metMiriams(locs==i,3); +0464 end +0465 end +0466 if ~isempty(rxnMiriams) +0467 locs = cell2mat(rxnMiriams(:,1)); +0468 for i=unique(locs)' +0469 model.rxnMiriams{i,1}.name=rxnMiriams(locs==i,2); +0470 model.rxnMiriams{i,1}.value=rxnMiriams(locs==i,3); +0471 end +0472 end +0473 if ~isempty(geneMiriams) +0474 locs = cell2mat(geneMiriams(:,1)); +0475 for i=unique(locs)' +0476 model.geneMiriams{i,1}.name=geneMiriams(locs==i,2); +0477 model.geneMiriams{i,1}.value=geneMiriams(locs==i,3); +0478 end +0479 end +0480 +0481 %Parse subSystems +0482 if ~isempty(subSystems) +0483 locs = cell2mat(subSystems(:,1)); +0484 for i=unique(locs)' +0485 model.subSystems{i,1}=subSystems(locs==i,2); +0486 end +0487 end +0488 +0489 %Parse ec-codes +0490 if ~isempty(eccodes) +0491 locs = cell2mat(eccodes(:,1)); +0492 for i=unique(locs)' +0493 eccodesCat=strjoin(eccodes(locs==i,2),';'); +0494 model.eccodes{i,1}=eccodesCat; +0495 end +0496 emptyEc=cellfun('isempty',model.eccodes); +0497 model.eccodes(emptyEc)={''}; +0498 end +0499 +0500 % follow-up data processing +0501 if verbose +0502 fprintf('\nimporting completed\nfollow-up processing...'); +0503 end +0504 [~, model.metComps] = ismember(model.metComps, model.comps); +0505 [~, model.geneComps] = ismember(model.geneComps, model.comps); +0506 [~, model.rxnComps] = ismember(model.rxnComps, model.comps); +0507 +0508 % Fill S-matrix +0509 rxnIdx = cellfun('isempty', equations(:,1)); +0510 equations(rxnIdx,:) = ''; +0511 rxnIdx = cell2mat(equations(:,1)); +0512 [~,metIdx] = ismember(equations(:,2),model.mets); +0513 coeffs = cell2mat(equations(:,3)); +0514 model.S=sparse(max(metIdx),max(rxnIdx)); +0515 linearIndices = sub2ind([max(metIdx), max(rxnIdx)],metIdx,rxnIdx); +0516 model.S(linearIndices) = coeffs; +0517 +0518 % Convert strings to numeric +0519 model.metCharges = str2double(model.metCharges); +0520 model.lb = str2double(model.lb); +0521 model.ub = str2double(model.ub); +0522 model.rxnConfidenceScores = str2double(model.rxnConfidenceScores); +0523 model.b = zeros(length(model.mets),1); +0524 model.metDeltaG = str2double(model.metDeltaG); +0525 model.rxnDeltaG = str2double(model.rxnDeltaG); +0526 +0527 % Fill some other fields +0528 model.annotation.defaultLB = min(model.lb); +0529 model.annotation.defaultUB = max(model.ub); +0530 if numel(model.lb)<numel(model.rxns) %No LB reported = min +0531 model.lb(end+1:numel(model.rxns)-numel(model.lb),1) = double(model.annotation.defaultLB); 0532 end -0533 if ~all(cellfun('isempty',model.rev)) -0534 model.rev = str2double(model.rev); -0535 else -0536 model.rev = []; -0537 end -0538 if numel(model.rev)<numel(model.rxns) %No rev reported, assume from LB and UB -0539 model.rev(end+1:numel(model.rxns)-numel(model.rev),1) = double(model.lb<0 & model.ub>0); +0533 if numel(model.ub)<numel(model.rxns) %No UB reported = max +0534 model.ub(end+1:numel(model.rxns)-numel(model.ub),1) = double(model.annotation.defaultUB); +0535 end +0536 if ~all(cellfun('isempty',model.rev)) +0537 model.rev = str2double(model.rev); +0538 else +0539 model.rev = []; 0540 end -0541 -0542 % Remove empty fields, otherwise fill to correct length -0543 % Reactions -0544 for i={'rxnNames','grRules','eccodes','rxnNotes','rxnReferences',... -0545 'rxnFrom','subSystems','rxnMiriams'} % Empty strings -0546 model = emptyOrFill(model,i{1},{''},'rxns'); -0547 end -0548 for i={'c'} % Zeros -0549 model = emptyOrFill(model,i{1},0,'rxns',true); +0541 if numel(model.rev)<numel(model.rxns) %No rev reported, assume from LB and UB +0542 model.rev(end+1:numel(model.rxns)-numel(model.rev),1) = double(model.lb<0 & model.ub>0); +0543 end +0544 +0545 % Remove empty fields, otherwise fill to correct length +0546 % Reactions +0547 for i={'rxnNames','grRules','eccodes','rxnNotes','rxnReferences',... +0548 'rxnFrom','subSystems','rxnMiriams'} % Empty strings +0549 model = emptyOrFill(model,i{1},{''},'rxns'); 0550 end -0551 for i={'rxnConfidenceScores','rxnDeltaG'} % NaNs -0552 model = emptyOrFill(model,i{1},NaN,'rxns'); +0551 for i={'c'} % Zeros +0552 model = emptyOrFill(model,i{1},0,'rxns',true); 0553 end -0554 for i={'rxnComps'} % Ones, assume first compartment -0555 model = emptyOrFill(model,i{1},1,'rxns'); +0554 for i={'rxnConfidenceScores','rxnDeltaG'} % NaNs +0555 model = emptyOrFill(model,i{1},NaN,'rxns'); 0556 end -0557 % Metabolites -0558 for i={'metNames','inchis','metFormulas','metMiriams','metFrom','metSmiles','metNotes'} % Empty strings -0559 model = emptyOrFill(model,i{1},{''},'mets'); -0560 end -0561 for i={'metCharges','unconstrained'} % Zeros -0562 model = emptyOrFill(model,i{1},0,'mets'); +0557 for i={'rxnComps'} % Ones, assume first compartment +0558 model = emptyOrFill(model,i{1},1,'rxns'); +0559 end +0560 % Metabolites +0561 for i={'metNames','inchis','metFormulas','metMiriams','metFrom','metSmiles','metNotes'} % Empty strings +0562 model = emptyOrFill(model,i{1},{''},'mets'); 0563 end -0564 for i={'metDeltaG'} % % NaNs -0565 model = emptyOrFill(model,i{1},NaN,'mets'); -0566 end -0567 for i={'metComps'} % Ones, assume first compartment -0568 model = emptyOrFill(model,i{1},1,'mets'); -0569 end -0570 % Genes -0571 for i={'geneMiriams','geneShortNames'} % Empty strings -0572 model = emptyOrFill(model,i{1},{''},'genes'); -0573 end -0574 for i={'geneComps'} % Ones, assume first compartment -0575 model = emptyOrFill(model,i{1},1,'genes'); +0564 for i={'metCharges','unconstrained'} % Zeros +0565 model = emptyOrFill(model,i{1},0,'mets'); +0566 end +0567 for i={'metDeltaG'} % % NaNs +0568 model = emptyOrFill(model,i{1},NaN,'mets'); +0569 end +0570 for i={'metComps'} % Ones, assume first compartment +0571 model = emptyOrFill(model,i{1},1,'mets'); +0572 end +0573 % Genes +0574 for i={'geneMiriams','geneShortNames','proteinNames'} % Empty strings +0575 model = emptyOrFill(model,i{1},{''},'genes'); 0576 end -0577 % Comps -0578 for i={'compNames'} % Empty strings -0579 model = emptyOrFill(model,i{1},{''},'comps'); -0580 end -0581 for i={'compOutside'} % First comp -0582 model = emptyOrFill(model,i{1},model.comps{1},'comps'); +0577 for i={'geneComps'} % Ones, assume first compartment +0578 model = emptyOrFill(model,i{1},1,'genes'); +0579 end +0580 % Comps +0581 for i={'compNames'} % Empty strings +0582 model = emptyOrFill(model,i{1},{''},'comps'); 0583 end -0584 % Single fields are kept, even if empty -0585 % for i={'description','name','version','date','annotation'} -0586 % if isempty(model.(i{1})) -0587 % model = rmfield(model,i{1}); -0588 % end -0589 % end -0590 -0591 % Make rxnGeneMat fields and map to the existing model.genes field -0592 [genes, rxnGeneMat] = getGenesFromGrRules(model.grRules); -0593 model.rxnGeneMat = sparse(numel(model.rxns),numel(model.genes)); -0594 [~,geneOrder] = ismember(genes,model.genes); -0595 if any(geneOrder == 0) -0596 error(['The grRules includes the following gene(s), that are not in '... -0597 'the list of model genes: ', genes{~geneOrder}]) -0598 end -0599 model.rxnGeneMat(:,geneOrder) = rxnGeneMat; -0600 -0601 % Finalize GECKO model -0602 if isGECKO -0603 % Fill in empty fields and empty entries -0604 for i={'kcat','source','notes','eccodes'} % Even keep empty -0605 model.ec = emptyOrFill(model.ec,i{1},{''},'rxns',true); -0606 end -0607 for i={'enzymes','mw','sequence'} -0608 model.ec = emptyOrFill(model.ec,i{1},{''},'genes',true); +0584 for i={'compOutside'} % First comp +0585 model = emptyOrFill(model,i{1},model.comps{1},'comps'); +0586 end +0587 % Single fields are kept, even if empty +0588 % for i={'description','name','version','date','annotation'} +0589 % if isempty(model.(i{1})) +0590 % model = rmfield(model,i{1}); +0591 % end +0592 % end +0593 +0594 % Make rxnGeneMat fields and map to the existing model.genes field +0595 [genes, rxnGeneMat] = getGenesFromGrRules(model.grRules); +0596 model.rxnGeneMat = sparse(numel(model.rxns),numel(model.genes)); +0597 [~,geneOrder] = ismember(genes,model.genes); +0598 if any(geneOrder == 0) +0599 error(['The grRules includes the following gene(s), that are not in '... +0600 'the list of model genes: ', genes{~geneOrder}]) +0601 end +0602 model.rxnGeneMat(:,geneOrder) = rxnGeneMat; +0603 +0604 % Finalize GECKO model +0605 if isGECKO +0606 % Fill in empty fields and empty entries +0607 for i={'kcat','source','notes','eccodes'} % Even keep empty +0608 model.ec = emptyOrFill(model.ec,i{1},{''},'rxns',true); 0609 end -0610 model.ec = emptyOrFill(model.ec,'concs',{'NaN'},'genes',true); -0611 model.ec = emptyOrFill(model.ec,'kcat',{'0'},'genes',true); -0612 % Change string to double -0613 for i={'kcat','mw','concs'} -0614 if isfield(model.ec,i{1}) -0615 model.ec.(i{1}) = str2double(model.ec.(i{1})); -0616 end -0617 end -0618 % Fill rxnEnzMat -0619 rxnIdx = cellfun('isempty', enzStoich(:,1)); -0620 enzStoich(rxnIdx,:) = ''; -0621 rxnIdx = cell2mat(enzStoich(:,1)); -0622 [~,enzIdx] = ismember(enzStoich(:,2),model.ec.enzymes); -0623 coeffs = cell2mat(enzStoich(:,3)); -0624 model.ec.rxnEnzMat = zeros(max(rxnIdx), max(enzIdx)); -0625 linearIndices = sub2ind([max(rxnIdx), max(enzIdx)], rxnIdx, enzIdx); -0626 model.ec.rxnEnzMat(linearIndices) = coeffs; -0627 %Parse ec-codes -0628 if ~isempty(ecGecko) -0629 locs = cell2mat(ecGecko(:,1)); -0630 for i=unique(locs)' -0631 ecGeckoCat=strjoin(ecGecko(locs==i,2),';'); -0632 model.ec.eccodes{i,1}=ecGeckoCat; -0633 end -0634 emptyEc=cellfun('isempty',model.ec.eccodes); -0635 model.ec.eccodes(emptyEc)={''}; -0636 end -0637 end -0638 -0639 if verbose -0640 fprintf(' Done!\n'); -0641 end -0642 end -0643 -0644 function model = emptyOrFill(model,field,emptyEntry,type,keepEmpty) -0645 if nargin<5 -0646 keepEmpty=false; -0647 end -0648 if isnumeric(emptyEntry) -0649 emptyCells=isempty(model.(field)); -0650 else -0651 emptyCells=cellfun('isempty',model.(field)); -0652 end -0653 if all(emptyCells) && ~keepEmpty -0654 model = rmfield(model, field); -0655 elseif numel(model.(field))<numel(model.(type)) -0656 model.(field)(end+1:numel(model.(type)),1)=emptyEntry; -0657 end -0658 end -0659 -0660 function model = readFieldValue(model, fieldName, value, pos) -0661 if numel(model.(fieldName))<pos-1 -0662 model.(fieldName)(end+1:pos,1) = {''}; -0663 end -0664 model.(fieldName)(pos,1) = {value}; -0665 end -0666 -0667 function [miriams, miriamKey,entryNumber] = gatherAnnotation(pos,miriams,key,value,miriamKey,entryNumber) -0668 if isempty(key) -0669 key=miriamKey; -0670 else -0671 miriamKey=key; -0672 end -0673 if ~isempty(value) -0674 miriams(entryNumber,1:3) = {pos, key, strip(value)}; -0675 else -0676 entryNumber = entryNumber - 1; -0677 end -0678 end +0610 for i={'enzymes','mw','sequence'} +0611 model.ec = emptyOrFill(model.ec,i{1},{''},'genes',true); +0612 end +0613 model.ec = emptyOrFill(model.ec,'concs',{'NaN'},'genes',true); +0614 model.ec = emptyOrFill(model.ec,'kcat',{'0'},'genes',true); +0615 % Change string to double +0616 for i={'kcat','mw','concs'} +0617 if isfield(model.ec,i{1}) +0618 model.ec.(i{1}) = str2double(model.ec.(i{1})); +0619 end +0620 end +0621 % Fill rxnEnzMat +0622 rxnIdx = cellfun('isempty', enzStoich(:,1)); +0623 enzStoich(rxnIdx,:) = ''; +0624 rxnIdx = cell2mat(enzStoich(:,1)); +0625 [~,enzIdx] = ismember(enzStoich(:,2),model.ec.enzymes); +0626 coeffs = cell2mat(enzStoich(:,3)); +0627 model.ec.rxnEnzMat = zeros(max(rxnIdx), max(enzIdx)); +0628 linearIndices = sub2ind([max(rxnIdx), max(enzIdx)], rxnIdx, enzIdx); +0629 model.ec.rxnEnzMat(linearIndices) = coeffs; +0630 %Parse ec-codes +0631 if ~isempty(ecGecko) +0632 locs = cell2mat(ecGecko(:,1)); +0633 for i=unique(locs)' +0634 ecGeckoCat=strjoin(ecGecko(locs==i,2),';'); +0635 model.ec.eccodes{i,1}=ecGeckoCat; +0636 end +0637 emptyEc=cellfun('isempty',model.ec.eccodes); +0638 model.ec.eccodes(emptyEc)={''}; +0639 end +0640 end +0641 +0642 if verbose +0643 fprintf(' Done!\n'); +0644 end +0645 end +0646 +0647 function model = emptyOrFill(model,field,emptyEntry,type,keepEmpty) +0648 if nargin<5 +0649 keepEmpty=false; +0650 end +0651 if isnumeric(emptyEntry) +0652 emptyCells=isempty(model.(field)); +0653 else +0654 emptyCells=cellfun('isempty',model.(field)); +0655 end +0656 if all(emptyCells) && ~keepEmpty +0657 model = rmfield(model, field); +0658 elseif numel(model.(field))<numel(model.(type)) +0659 model.(field)(end+1:numel(model.(type)),1)=emptyEntry; +0660 end +0661 end +0662 +0663 function model = readFieldValue(model, fieldName, value, pos) +0664 if numel(model.(fieldName))<pos-1 +0665 model.(fieldName)(end+1:pos,1) = {''}; +0666 end +0667 model.(fieldName)(pos,1) = {value}; +0668 end +0669 +0670 function [miriams, miriamKey,entryNumber] = gatherAnnotation(pos,miriams,key,value,miriamKey,entryNumber) +0671 if isempty(key) +0672 key=miriamKey; +0673 else +0674 miriamKey=key; +0675 end +0676 if ~isempty(value) +0677 miriams(entryNumber,1:3) = {pos, key, strip(value)}; +0678 else +0679 entryNumber = entryNumber - 1; +0680 end +0681 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/io/writeYAMLmodel.html b/doc/io/writeYAMLmodel.html index 66b837d2..c580cff7 100644 --- a/doc/io/writeYAMLmodel.html +++ b/doc/io/writeYAMLmodel.html @@ -172,268 +172,269 @@

SOURCE CODE ^' - !!omap\n'); 0114 writeField(model, fid, 'genes', 'txt', i, ' - id', preserveQuotes) 0115 writeField(model, fid, 'geneShortNames', 'txt', i, ' - name', preserveQuotes) -0116 writeField(model, fid, 'geneMiriams', 'txt', i, ' - annotation', preserveQuotes) -0117 end -0118 -0119 %Compartments: -0120 fprintf(fid,'- compartments: !!omap\n'); -0121 for i = 1:length(model.comps) -0122 writeField(model, fid, 'compNames', 'txt', i, ['- ' model.comps{i}], preserveQuotes) -0123 writeField(model, fid, 'compMiriams', 'txt', i, '- annotation', preserveQuotes) -0124 end -0125 +0116 writeField(model, fid, 'proteinNames', 'txt', i, ' - protein', preserveQuotes) +0117 writeField(model, fid, 'geneMiriams', 'txt', i, ' - annotation', preserveQuotes) +0118 end +0119 +0120 %Compartments: +0121 fprintf(fid,'- compartments: !!omap\n'); +0122 for i = 1:length(model.comps) +0123 writeField(model, fid, 'compNames', 'txt', i, ['- ' model.comps{i}], preserveQuotes) +0124 writeField(model, fid, 'compMiriams', 'txt', i, '- annotation', preserveQuotes) +0125 end 0126 -0127 %EC-model: -0128 if isfield(model,'ec') -0129 fprintf(fid,'- ec-rxns:\n'); -0130 for i = 1:length(model.ec.rxns) -0131 fprintf(fid,' - !!omap\n'); -0132 writeField(model.ec, fid, 'rxns', 'txt', i, '- id', preserveQuotes) -0133 writeField(model.ec, fid, 'kcat', 'num', i, '- kcat', preserveQuotes) -0134 writeField(model.ec, fid, 'source', 'txt', i, '- source', preserveQuotes) -0135 writeField(model.ec, fid, 'notes', 'txt', i, '- notes', preserveQuotes) -0136 writeField(model.ec, fid, 'eccodes', 'txt', i, '- eccodes', preserveQuotes) -0137 writeField(model.ec, fid, 'rxnEnzMat', 'txt', i, '- enzymes', preserveQuotes) -0138 end -0139 -0140 fprintf(fid,'- ec-enzymes:\n'); -0141 for i = 1:length(model.ec.genes) -0142 fprintf(fid,' - !!omap\n'); -0143 writeField(model.ec, fid, 'genes', 'txt', i, '- genes', preserveQuotes) -0144 writeField(model.ec, fid, 'enzymes', 'txt', i, '- enzymes', preserveQuotes) -0145 writeField(model.ec, fid, 'mw', 'num', i, '- mw', preserveQuotes) -0146 writeField(model.ec, fid, 'sequence', 'txt', i, '- sequence', preserveQuotes) -0147 writeField(model.ec, fid, 'concs', 'num', i, '- concs', preserveQuotes) -0148 end -0149 end -0150 -0151 %Close file: -0152 fclose(fid); -0153 -0154 end -0155 -0156 function writeField(model,fid,fieldName,type,pos,name,preserveQuotes) -0157 %Writes a new line in the yaml file if the field exists and the field is -0158 %not empty at the correspoinding position. It's recursive for some fields -0159 %(metMiriams, rxnMiriams, and S) -0160 -0161 if isfield(model,fieldName) -0162 if strcmp(fieldName,'metComps') -0163 %metComps: write full name -0164 fieldName = 'comps'; -0165 pos = model.metComps(pos); -0166 end -0167 -0168 field = model.(fieldName); -0169 -0170 if strcmp(fieldName,'metMiriams') -0171 if ~isempty(model.metMiriams{pos}) -0172 fprintf(fid,' %s: !!omap\n',name); -0173 for i=1:size(model.newMetMiriams,2) -0174 %'i' represents the different miriam names, e.g. -0175 %kegg.compound or chebi -0176 if ~isempty(model.newMetMiriams{pos,i}) -0177 %As during the following writeField call the value of -0178 %'i' would be lost, it is temporarily concatenated to -0179 %'name' parameter, which will be edited later -0180 writeField(model, fid, 'newMetMiriams', 'txt', pos, [' - ' model.newMetMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) -0181 end -0182 end -0183 end -0184 -0185 elseif strcmp(fieldName,'rxnMiriams') -0186 if ~isempty(model.rxnMiriams{pos}) -0187 fprintf(fid,' %s: !!omap\n',name); -0188 for i=1:size(model.newRxnMiriams,2) -0189 if ~isempty(model.newRxnMiriams{pos,i}) -0190 writeField(model, fid, 'newRxnMiriams', 'txt', pos, [' - ' model.newRxnMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) -0191 end -0192 end -0193 end -0194 -0195 elseif strcmp(fieldName,'geneMiriams') -0196 if ~isempty(model.geneMiriams{pos}) -0197 fprintf(fid,' %s: !!omap\n',name); -0198 for i=1:size(model.newGeneMiriams,2) -0199 if ~isempty(model.newGeneMiriams{pos,i}) -0200 writeField(model, fid, 'newGeneMiriams', 'txt', pos, [' - ' model.newGeneMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) -0201 end -0202 end -0203 end -0204 -0205 elseif strcmp(fieldName,'compMiriams') -0206 if ~isempty(model.compMiriams{pos}) -0207 fprintf(fid,' %s: !!omap\n',name); -0208 for i=1:size(model.newCompMiriams,2) -0209 if ~isempty(model.newCompMiriams{pos,i}) -0210 writeField(model, fid, 'newCompMiriams', 'txt', pos, [' - ' model.newCompMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) -0211 end -0212 end -0213 end -0214 -0215 elseif strcmp(fieldName,'S') -0216 %S: create header & write each metabolite in a new line -0217 fprintf(fid,' %s: !!omap\n',name); -0218 if sum(field(:,pos) ~= 0) > 0 -0219 model.mets = model.mets(field(:,pos) ~= 0); -0220 model.coeffs = field(field(:,pos) ~= 0,pos); -0221 %Sort metabolites: -0222 [model.mets,order] = sort(model.mets); -0223 model.coeffs = model.coeffs(order); -0224 for i = 1:length(model.mets) -0225 writeField(model, fid, 'coeffs', 'num', i, [' - ' model.mets{i}], preserveQuotes) -0226 end -0227 end -0228 -0229 elseif strcmp(fieldName,'rxnEnzMat') -0230 %S: create header & write each enzyme in a new line -0231 fprintf(fid,' %s: !!omap\n',name); -0232 if sum(field(pos,:) ~= 0) > 0 -0233 model.enzymes = model.enzymes(field(pos,:) ~= 0); -0234 model.coeffs = field(pos,field(pos,:) ~= 0); -0235 %Sort metabolites: -0236 [model.enzymes,order] = sort(model.enzymes); -0237 model.coeffs = model.coeffs(order); -0238 for i = 1:length(model.enzymes) -0239 writeField(model, fid, 'coeffs', 'num', i, [' - ' model.enzymes{i}], preserveQuotes) -0240 end -0241 end -0242 -0243 elseif sum(strcmp({'subSystems','newMetMiriams','newRxnMiriams','newGeneMiriams','newCompMiriams','eccodes'},fieldName)) > 0 -0244 %eccodes/rxnNotes: if 1 write in 1 line, if more create header and list -0245 if strcmp(fieldName,'subSystems') -0246 list = field{pos}; %subSystems already comes in a cell array -0247 if isempty(list) -0248 return -0249 end -0250 elseif strcmp(fieldName,'newMetMiriams') -0251 index = str2double(regexprep(name,'^.+_','')); -0252 name = regexprep(name,'_\d+$',''); -0253 list = strsplit(model.newMetMiriams{pos,index},'; '); -0254 elseif strcmp(fieldName,'newRxnMiriams') -0255 index = str2double(regexprep(name,'^.+_','')); -0256 name = regexprep(name,'_\d+$',''); -0257 list = strsplit(model.newRxnMiriams{pos,index},'; '); -0258 elseif strcmp(fieldName,'newGeneMiriams') -0259 index = str2double(regexprep(name,'^.+_','')); -0260 name = regexprep(name,'_\d+$',''); -0261 list = strsplit(model.newGeneMiriams{pos,index},'; '); -0262 elseif strcmp(fieldName,'newCompMiriams') -0263 index = str2double(regexprep(name,'^.+_','')); -0264 name = regexprep(name,'_\d+$',''); -0265 list = strsplit(model.newCompMiriams{pos,index},'; '); -0266 elseif ~isempty(field{pos}) -0267 list = strrep(field{pos},' ',''); -0268 list = strsplit(list,';'); -0269 else -0270 return % empty, needs no line in file -0271 end -0272 list=strip(list); -0273 -0274 if length(list) == 1 && ~strcmp(list{1},'') && ~strcmp(fieldName,'subSystems') -0275 if preserveQuotes -0276 list = ['"' list{1} '"']; -0277 end -0278 fprintf(fid,' %s: %s\n',name,list); -0279 elseif ischar(list) && strcmp(fieldName,'subSystems') -0280 if preserveQuotes -0281 list = ['"' list '"']; -0282 end -0283 fprintf(fid,' %s: %s\n',name,list); -0284 elseif length(list) > 1 || strcmp(fieldName,'subSystems') -0285 if preserveQuotes -0286 for j=1:numel(list) -0287 list{j} = ['"' list{j} '"']; -0288 end -0289 end -0290 fprintf(fid,' %s:\n',name); -0291 for i = 1:length(list) -0292 fprintf(fid,'%s - %s\n',regexprep(name,'(^\s*).*','$1'),list{i}); -0293 end -0294 end -0295 -0296 elseif sum(pos) > 0 -0297 %All other fields: -0298 if strcmp(type,'txt') -0299 value = field{pos}; -0300 if preserveQuotes && ~isempty(value) -0301 value = ['"',value,'"']; -0302 end -0303 elseif strcmp(type,'num') -0304 if isnan(field(pos)) -0305 value = []; -0306 else -0307 value = sprintf('%.15g',full(field(pos))); -0308 end -0309 end -0310 if ~isempty(value) -0311 fprintf(fid,' %s: %s\n',name,value); -0312 end -0313 end -0314 end +0127 +0128 %EC-model: +0129 if isfield(model,'ec') +0130 fprintf(fid,'- ec-rxns:\n'); +0131 for i = 1:length(model.ec.rxns) +0132 fprintf(fid,' - !!omap\n'); +0133 writeField(model.ec, fid, 'rxns', 'txt', i, '- id', preserveQuotes) +0134 writeField(model.ec, fid, 'kcat', 'num', i, '- kcat', preserveQuotes) +0135 writeField(model.ec, fid, 'source', 'txt', i, '- source', preserveQuotes) +0136 writeField(model.ec, fid, 'notes', 'txt', i, '- notes', preserveQuotes) +0137 writeField(model.ec, fid, 'eccodes', 'txt', i, '- eccodes', preserveQuotes) +0138 writeField(model.ec, fid, 'rxnEnzMat', 'txt', i, '- enzymes', preserveQuotes) +0139 end +0140 +0141 fprintf(fid,'- ec-enzymes:\n'); +0142 for i = 1:length(model.ec.genes) +0143 fprintf(fid,' - !!omap\n'); +0144 writeField(model.ec, fid, 'genes', 'txt', i, '- genes', preserveQuotes) +0145 writeField(model.ec, fid, 'enzymes', 'txt', i, '- enzymes', preserveQuotes) +0146 writeField(model.ec, fid, 'mw', 'num', i, '- mw', preserveQuotes) +0147 writeField(model.ec, fid, 'sequence', 'txt', i, '- sequence', preserveQuotes) +0148 writeField(model.ec, fid, 'concs', 'num', i, '- concs', preserveQuotes) +0149 end +0150 end +0151 +0152 %Close file: +0153 fclose(fid); +0154 +0155 end +0156 +0157 function writeField(model,fid,fieldName,type,pos,name,preserveQuotes) +0158 %Writes a new line in the yaml file if the field exists and the field is +0159 %not empty at the correspoinding position. It's recursive for some fields +0160 %(metMiriams, rxnMiriams, and S) +0161 +0162 if isfield(model,fieldName) +0163 if strcmp(fieldName,'metComps') +0164 %metComps: write full name +0165 fieldName = 'comps'; +0166 pos = model.metComps(pos); +0167 end +0168 +0169 field = model.(fieldName); +0170 +0171 if strcmp(fieldName,'metMiriams') +0172 if ~isempty(model.metMiriams{pos}) +0173 fprintf(fid,' %s: !!omap\n',name); +0174 for i=1:size(model.newMetMiriams,2) +0175 %'i' represents the different miriam names, e.g. +0176 %kegg.compound or chebi +0177 if ~isempty(model.newMetMiriams{pos,i}) +0178 %As during the following writeField call the value of +0179 %'i' would be lost, it is temporarily concatenated to +0180 %'name' parameter, which will be edited later +0181 writeField(model, fid, 'newMetMiriams', 'txt', pos, [' - ' model.newMetMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) +0182 end +0183 end +0184 end +0185 +0186 elseif strcmp(fieldName,'rxnMiriams') +0187 if ~isempty(model.rxnMiriams{pos}) +0188 fprintf(fid,' %s: !!omap\n',name); +0189 for i=1:size(model.newRxnMiriams,2) +0190 if ~isempty(model.newRxnMiriams{pos,i}) +0191 writeField(model, fid, 'newRxnMiriams', 'txt', pos, [' - ' model.newRxnMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) +0192 end +0193 end +0194 end +0195 +0196 elseif strcmp(fieldName,'geneMiriams') +0197 if ~isempty(model.geneMiriams{pos}) +0198 fprintf(fid,' %s: !!omap\n',name); +0199 for i=1:size(model.newGeneMiriams,2) +0200 if ~isempty(model.newGeneMiriams{pos,i}) +0201 writeField(model, fid, 'newGeneMiriams', 'txt', pos, [' - ' model.newGeneMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) +0202 end +0203 end +0204 end +0205 +0206 elseif strcmp(fieldName,'compMiriams') +0207 if ~isempty(model.compMiriams{pos}) +0208 fprintf(fid,' %s: !!omap\n',name); +0209 for i=1:size(model.newCompMiriams,2) +0210 if ~isempty(model.newCompMiriams{pos,i}) +0211 writeField(model, fid, 'newCompMiriams', 'txt', pos, [' - ' model.newCompMiriamNames{i} '_' sprintf('%d',i)], preserveQuotes) +0212 end +0213 end +0214 end +0215 +0216 elseif strcmp(fieldName,'S') +0217 %S: create header & write each metabolite in a new line +0218 fprintf(fid,' %s: !!omap\n',name); +0219 if sum(field(:,pos) ~= 0) > 0 +0220 model.mets = model.mets(field(:,pos) ~= 0); +0221 model.coeffs = field(field(:,pos) ~= 0,pos); +0222 %Sort metabolites: +0223 [model.mets,order] = sort(model.mets); +0224 model.coeffs = model.coeffs(order); +0225 for i = 1:length(model.mets) +0226 writeField(model, fid, 'coeffs', 'num', i, [' - ' model.mets{i}], preserveQuotes) +0227 end +0228 end +0229 +0230 elseif strcmp(fieldName,'rxnEnzMat') +0231 %S: create header & write each enzyme in a new line +0232 fprintf(fid,' %s: !!omap\n',name); +0233 if sum(field(pos,:) ~= 0) > 0 +0234 model.enzymes = model.enzymes(field(pos,:) ~= 0); +0235 model.coeffs = field(pos,field(pos,:) ~= 0); +0236 %Sort metabolites: +0237 [model.enzymes,order] = sort(model.enzymes); +0238 model.coeffs = model.coeffs(order); +0239 for i = 1:length(model.enzymes) +0240 writeField(model, fid, 'coeffs', 'num', i, [' - ' model.enzymes{i}], preserveQuotes) +0241 end +0242 end +0243 +0244 elseif sum(strcmp({'subSystems','newMetMiriams','newRxnMiriams','newGeneMiriams','newCompMiriams','eccodes'},fieldName)) > 0 +0245 %eccodes/rxnNotes: if 1 write in 1 line, if more create header and list +0246 if strcmp(fieldName,'subSystems') +0247 list = field{pos}; %subSystems already comes in a cell array +0248 if isempty(list) +0249 return +0250 end +0251 elseif strcmp(fieldName,'newMetMiriams') +0252 index = str2double(regexprep(name,'^.+_','')); +0253 name = regexprep(name,'_\d+$',''); +0254 list = strsplit(model.newMetMiriams{pos,index},'; '); +0255 elseif strcmp(fieldName,'newRxnMiriams') +0256 index = str2double(regexprep(name,'^.+_','')); +0257 name = regexprep(name,'_\d+$',''); +0258 list = strsplit(model.newRxnMiriams{pos,index},'; '); +0259 elseif strcmp(fieldName,'newGeneMiriams') +0260 index = str2double(regexprep(name,'^.+_','')); +0261 name = regexprep(name,'_\d+$',''); +0262 list = strsplit(model.newGeneMiriams{pos,index},'; '); +0263 elseif strcmp(fieldName,'newCompMiriams') +0264 index = str2double(regexprep(name,'^.+_','')); +0265 name = regexprep(name,'_\d+$',''); +0266 list = strsplit(model.newCompMiriams{pos,index},'; '); +0267 elseif ~isempty(field{pos}) +0268 list = strrep(field{pos},' ',''); +0269 list = strsplit(list,';'); +0270 else +0271 return % empty, needs no line in file +0272 end +0273 list=strip(list); +0274 +0275 if length(list) == 1 && ~strcmp(list{1},'') && ~strcmp(fieldName,'subSystems') +0276 if preserveQuotes +0277 list = ['"' list{1} '"']; +0278 end +0279 fprintf(fid,' %s: %s\n',name,list); +0280 elseif ischar(list) && strcmp(fieldName,'subSystems') +0281 if preserveQuotes +0282 list = ['"' list '"']; +0283 end +0284 fprintf(fid,' %s: %s\n',name,list); +0285 elseif length(list) > 1 || strcmp(fieldName,'subSystems') +0286 if preserveQuotes +0287 for j=1:numel(list) +0288 list{j} = ['"' list{j} '"']; +0289 end +0290 end +0291 fprintf(fid,' %s:\n',name); +0292 for i = 1:length(list) +0293 fprintf(fid,'%s - %s\n',regexprep(name,'(^\s*).*','$1'),list{i}); +0294 end +0295 end +0296 +0297 elseif sum(pos) > 0 +0298 %All other fields: +0299 if strcmp(type,'txt') +0300 value = field{pos}; +0301 if preserveQuotes && ~isempty(value) +0302 value = ['"',value,'"']; +0303 end +0304 elseif strcmp(type,'num') +0305 if isnan(field(pos)) +0306 value = []; +0307 else +0308 value = sprintf('%.15g',full(field(pos))); +0309 end +0310 end +0311 if ~isempty(value) +0312 fprintf(fid,' %s: %s\n',name,value); +0313 end +0314 end 0315 end -0316 -0317 function writeMetadata(model,fid) -0318 % Writes model metadata to the yaml file. This information will eventually -0319 % be extracted entirely from the model, but for now, many of the entries -0320 % are hard-coded defaults for HumanGEM. -0321 -0322 fprintf(fid, '- metaData:\n'); -0323 if isfield(model,'id') -0324 fprintf(fid, ' id: "%s"\n', model.id); -0325 else -0326 fprintf(fid, ' id: "blankID"\n'); -0327 end -0328 if isfield(model,'name') -0329 fprintf(fid, ' name: "%s"\n',model.name); -0330 else -0331 fprintf(fid, ' name: "blankName"\n'); -0332 end -0333 if isfield(model,'version') -0334 fprintf(fid, ' version: "%s"\n',model.version); -0335 end -0336 fprintf(fid, ' date: "%s"\n',datestr(now,29)); % 29=YYYY-MM-DD -0337 if isfield(model,'annotation') -0338 if isfield(model.annotation,'defaultLB') -0339 fprintf(fid, ' defaultLB: "%g"\n', model.annotation.defaultLB); -0340 end -0341 if isfield(model.annotation,'defaultUB') -0342 fprintf(fid, ' defaultUB: "%g"\n', model.annotation.defaultUB); -0343 end -0344 if isfield(model.annotation,'givenName') -0345 fprintf(fid, ' givenName: "%s"\n', model.annotation.givenName); -0346 end -0347 if isfield(model.annotation,'familyName') -0348 fprintf(fid, ' familyName: "%s"\n', model.annotation.familyName); -0349 end -0350 if isfield(model.annotation,'authors') -0351 fprintf(fid, ' authors: "%s"\n', model.annotation.authors); -0352 end -0353 if isfield(model.annotation,'email') -0354 fprintf(fid, ' email: "%s"\n', model.annotation.email); -0355 end -0356 if isfield(model.annotation,'organization') -0357 fprintf(fid, ' organization: "%s"\n',model.annotation.organization); -0358 end -0359 if isfield(model.annotation,'taxonomy') -0360 fprintf(fid, ' taxonomy: "%s"\n', model.annotation.taxonomy); -0361 end -0362 if isfield(model.annotation,'note') -0363 fprintf(fid, ' note: "%s"\n', model.annotation.note); -0364 end -0365 if isfield(model.annotation,'sourceUrl') -0366 fprintf(fid, ' sourceUrl: "%s"\n', model.annotation.sourceUrl); -0367 end -0368 end -0369 if isfield(model,'ec') -0370 if model.ec.geckoLight -0371 geckoLight = 'true'; -0372 else -0373 geckoLight = 'false'; -0374 end -0375 fprintf(fid,' geckoLight: "%s"\n',geckoLight); -0376 end -0377 end +0316 end +0317 +0318 function writeMetadata(model,fid) +0319 % Writes model metadata to the yaml file. This information will eventually +0320 % be extracted entirely from the model, but for now, many of the entries +0321 % are hard-coded defaults for HumanGEM. +0322 +0323 fprintf(fid, '- metaData:\n'); +0324 if isfield(model,'id') +0325 fprintf(fid, ' id: "%s"\n', model.id); +0326 else +0327 fprintf(fid, ' id: "blankID"\n'); +0328 end +0329 if isfield(model,'name') +0330 fprintf(fid, ' name: "%s"\n',model.name); +0331 else +0332 fprintf(fid, ' name: "blankName"\n'); +0333 end +0334 if isfield(model,'version') +0335 fprintf(fid, ' version: "%s"\n',model.version); +0336 end +0337 fprintf(fid, ' date: "%s"\n',datestr(now,29)); % 29=YYYY-MM-DD +0338 if isfield(model,'annotation') +0339 if isfield(model.annotation,'defaultLB') +0340 fprintf(fid, ' defaultLB: "%g"\n', model.annotation.defaultLB); +0341 end +0342 if isfield(model.annotation,'defaultUB') +0343 fprintf(fid, ' defaultUB: "%g"\n', model.annotation.defaultUB); +0344 end +0345 if isfield(model.annotation,'givenName') +0346 fprintf(fid, ' givenName: "%s"\n', model.annotation.givenName); +0347 end +0348 if isfield(model.annotation,'familyName') +0349 fprintf(fid, ' familyName: "%s"\n', model.annotation.familyName); +0350 end +0351 if isfield(model.annotation,'authors') +0352 fprintf(fid, ' authors: "%s"\n', model.annotation.authors); +0353 end +0354 if isfield(model.annotation,'email') +0355 fprintf(fid, ' email: "%s"\n', model.annotation.email); +0356 end +0357 if isfield(model.annotation,'organization') +0358 fprintf(fid, ' organization: "%s"\n',model.annotation.organization); +0359 end +0360 if isfield(model.annotation,'taxonomy') +0361 fprintf(fid, ' taxonomy: "%s"\n', model.annotation.taxonomy); +0362 end +0363 if isfield(model.annotation,'note') +0364 fprintf(fid, ' note: "%s"\n', model.annotation.note); +0365 end +0366 if isfield(model.annotation,'sourceUrl') +0367 fprintf(fid, ' sourceUrl: "%s"\n', model.annotation.sourceUrl); +0368 end +0369 end +0370 if isfield(model,'ec') +0371 if model.ec.geckoLight +0372 geckoLight = 'true'; +0373 else +0374 geckoLight = 'false'; +0375 end +0376 fprintf(fid,' geckoLight: "%s"\n',geckoLight); +0377 end +0378 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/struct_conversion/ravenCobraWrapper.html b/doc/struct_conversion/ravenCobraWrapper.html index 07137fa5..f8aaad4d 100644 --- a/doc/struct_conversion/ravenCobraWrapper.html +++ b/doc/struct_conversion/ravenCobraWrapper.html @@ -49,8 +49,7 @@

DESCRIPTION ^SOURCE CODE ^% reconstructed based on lower bound reaction values 0022 % 0023 % NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following -0024 % fields are lost: geneEntrezID, metSmiles, modelVersion, -0025 % proteinNames, proteins -0026 % -0027 % NOTE: The information about mandatory RAVEN fields was taken from -0028 % checkModelStruct function, whereas the corresponding information about -0029 % COBRA fields was fetched from verifyModel function -0030 % -0031 % Usage: newModel=ravenCobraWrapper(model) -0032 -0033 if isfield(model,'rules') -0034 isRaven=false; -0035 else -0036 isRaven=true; -0037 end -0038 -0039 ravenPath=findRAVENroot(); -0040 -0041 % Load COBRA field information -0042 fid = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv')); % Taken from https://github.com/opencobra/cobratoolbox/blob/develop/src/base/io/definitions/COBRA_structure_fields.csv -0043 fieldFile = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1); -0044 dbFields = ~cellfun(@isempty,fieldFile{5}); % Only keep fields with database annotations that should be translated to xxxMiriams -0045 dbFields = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'}); -0046 COBRAnamespace = fieldFile{5}(dbFields); -0047 COBRAnamespace = regexprep(COBRAnamespace,';.*',''); % Only keep first suggested namespace -0048 COBRAfields = fieldFile{1}(dbFields); -0049 fclose(fid); -0050 -0051 % Load conversion between additional COBRA fields and namespaces: -0052 fid = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv')); -0053 fieldFile = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0); -0054 COBRAfields = [COBRAfields; fieldFile{1}]; -0055 COBRAnamespace = [COBRAnamespace; fieldFile{2}]; -0056 rxnCOBRAfields = COBRAfields(startsWith(COBRAfields,'rxn')); -0057 rxnNamespaces = COBRAnamespace(startsWith(COBRAfields,'rxn')); -0058 metCOBRAfields = COBRAfields(startsWith(COBRAfields,'met')); -0059 metNamespaces = COBRAnamespace(startsWith(COBRAfields,'met')); -0060 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene')); -0061 geneNamespaces = COBRAnamespace(startsWith(COBRAfields,'gene')); -0062 fclose(fid); -0063 -0064 if isRaven -0065 %Firstly remove boundary metabolites -0066 model=simplifyModel(model); -0067 end -0068 -0069 % Keep fields that have identical names and content -0070 newModel.S=model.S; -0071 newModel.lb=model.lb; -0072 newModel.ub=model.ub; -0073 if isfield(model,'c') -0074 newModel.c=model.c; -0075 else -0076 newModel.c=zeros(numel(model.rxns),1); -0077 end -0078 newModel.rxns=model.rxns; -0079 optFields = {'rxnNames','subSystems','rxnNotes','metDeltaG','rxnDeltaG',... -0080 'metFormulas','comps','compNames','metCharges','genes',... -0081 'rxnConfidenceScores','rxnGeneMat','metNotes','rev'}; -0082 for i=1:length(optFields) -0083 if isfield(model,optFields{i}) -0084 newModel.(optFields{i})=model.(optFields{i}); -0085 end -0086 end -0087 -0088 % Convert unique fields -0089 if isRaven -0090 fprintf('Converting RAVEN structure to COBRA..\n'); -0091 %Convert from RAVEN to COBRA structure -0092 -0093 %Mandatory COBRA fields -0094 newModel.rxns=model.rxns; -0095 if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$'))) -0096 newModel.mets=model.mets; -0097 else -0098 %Check if model has compartment info as "met_c" suffix in all metabolites: -0099 BiGGformat = false(size(model.mets)); -0100 for i=1:numel(model.comps) -0101 compPos=model.metComps==i; -0102 BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$'])); -0103 end -0104 if all(BiGGformat) -0105 newModel.mets=model.mets; -0106 for i=1:numel(model.comps) -0107 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']); -0108 end -0109 else -0110 newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']'); -0111 end -0112 end -0113 -0114 %b, csense, osenseStr, genes, rules are also mandatory, but defined -0115 %later to match the order of fields -0116 -0117 %Optional COBRA fields -0118 if isfield(model,'id') -0119 newModel.modelID=model.id; -0120 end -0121 if isfield(model,'name') -0122 newModel.modelName=model.name; -0123 end -0124 if isfield(model,'eccodes') -0125 newModel.rxnECNumbers=model.eccodes; -0126 end -0127 if isfield(model,'rxnMiriams') -0128 [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams); -0129 for i = 1:length(rxnCOBRAfields) -0130 j=ismember(extractedMiriamNames,rxnNamespaces{i}); -0131 if any(j) -0132 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);']) -0133 end -0134 end -0135 end -0136 if isfield(model,'rxnReferences') % Concatenate model.rxnReferences to those extracted from model.rxnMiriams -0137 if isfield(newModel,'rxnReferences') -0138 newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences); -0139 newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $',''); -0140 else -0141 newModel.rxnReferences = model.rxnReferences; -0142 end -0143 end -0144 if isfield(model,'metNames') -0145 newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']'); -0146 end -0147 if isfield(model,'metMiriams') -0148 [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams); -0149 %Shorten miriam names for KEGG and PubChem. These shorter names -0150 %will be used later to concatenate KEGG COMPOUND/GLYCAN and PubChem -0151 %Compound/Substance, into corresponding COBRA model fields -0152 extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg'); -0153 extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem'); -0154 i=ismember(extractedMiriamNames,'kegg'); -0155 if any(i) % Combine KEGG compounds and glycans -0156 for j=1:length(i) -0157 if i(j) && isfield(newModel,'metKEGGID')~=1 -0158 newModel.metKEGGID=miriams(:,j); -0159 elseif i(j) -0160 newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j)); -0161 end -0162 end -0163 newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$',''); -0164 end -0165 i=ismember(extractedMiriamNames,'pubchem'); -0166 if any(i) % Combine Pubchem compounds and substances -0167 for j=1:length(i) -0168 if i(j) && isfield(newModel,'metPubChemID')~=1 -0169 newModel.metPubChemID=miriams(:,j); -0170 elseif i(j) -0171 newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j)); -0172 end -0173 end -0174 newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$',''); -0175 end -0176 %All other Miriams can be directly parsed with no modifications: -0177 for i = 1:length(metCOBRAfields) -0178 j=ismember(extractedMiriamNames,metNamespaces{i}); -0179 if any(j) -0180 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);']) -0181 end -0182 end -0183 end -0184 if isfield(model,'inchis') -0185 newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$',''); -0186 end -0187 newModel.b=zeros(numel(model.mets),1); -0188 newModel.csense=repmat('E',size(model.mets)); -0189 if isfield(model,'geneMiriams') -0190 [miriams,extractedMiriamNames]=extractMiriam(model.geneMiriams); -0191 for i = 1:length(geneCOBRAfields) -0192 j=ismember(extractedMiriamNames,geneNamespaces{i}); -0193 if any(j) -0194 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);']) -0195 end -0196 end -0197 end -0198 if isfield(model,'geneShortNames') -0199 newModel.geneNames=model.geneShortNames; -0200 end -0201 if isfield(model,'genes') -0202 newModel.rules=grrulesToRules(model); -0203 else -0204 fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n'); -0205 end -0206 newModel.osenseStr='max'; -0207 else -0208 fprintf('Converting COBRA structure to RAVEN..\n'); -0209 %Convert from COBRA to RAVEN structure -0210 -0211 %Mandatory RAVEN fields -0212 newModel.mets=model.mets; -0213 if ~isfield(model,'comps') -0214 %Since 'comps' field is not mandatory in COBRA, it may be required -0215 %to obtain the non-redundant list of comps from metabolite ids, if -0216 %'comps' field is not available -0217 newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1')); -0218 newModel.compNames = newModel.comps; -0219 end -0220 for i=1:numel(newModel.comps) -0221 newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],''); -0222 newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],''); -0223 end -0224 -0225 %In some cases (e.g. any model that uses BiGG ids as main ids), there -0226 %may be overlapping mets due to removal of compartment info. To avoid -0227 %this, we change compartments from e.g. [c] into _c -0228 if numel(unique(newModel.mets))~=numel(model.mets) -0229 newModel.mets=model.mets; -0230 for i=1:numel(newModel.comps) -0231 newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]); -0232 end -0233 end -0234 %Since COBRA no longer contains rev field it is assumed that rxn is -0235 %reversible if its lower bound is set below zero -0236 if ~isfield(model,'rev') -0237 for i=1:numel(model.rxns) -0238 if model.lb(i)<0 -0239 newModel.rev(i,1)=1; -0240 else -0241 newModel.rev(i,1)=0; -0242 end -0243 end -0244 end -0245 newModel.b=zeros(numel(model.mets),1); -0246 -0247 %metComps is also mandatory, but defined later to match the order of -0248 %fields -0249 -0250 %Fields 'name' and 'id' are also considered as mandatory, but -0251 %these are added to the model during exportModel/exportToExcelFormat -0252 %anyway, so there is no point to add this information here -0253 -0254 %Optional RAVEN fields -0255 if isfield(model,'modelID') -0256 newModel.id=model.modelID; -0257 end -0258 if isfield(model,'modelName') -0259 newModel.name=model.modelName; -0260 end -0261 if isfield(model,'rules') && ~isfield(model,'grRules') -0262 model.grRules = rulesTogrrules(model); -0263 end -0264 if isfield(model,'grRules') -0265 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0266 newModel.grRules = grRules; -0267 newModel.rxnGeneMat = rxnGeneMat; -0268 end -0269 if isfield(model,'rxnECNumbers') -0270 newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:',''); -0271 end -0272 if any(isfield(model,rxnCOBRAfields)) -0273 for i=1:numel(model.rxns) -0274 counter=1; -0275 newModel.rxnMiriams{i,1}=[]; -0276 if isfield(model,'rxnReferences') -0277 if ~isempty(model.rxnReferences{i}) -0278 pmids = model.rxnReferences{i}; -0279 pmids = strsplit(pmids,'; '); -0280 nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once')); -0281 if any(nonPmids) %Not a pubmed id, keep in rxnReferences instead -0282 newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', '); -0283 pmids(nonPmids)=[]; -0284 end -0285 for j = 1:length(pmids) -0286 newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed'; -0287 newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j}; -0288 counter=counter+1; -0289 end -0290 end -0291 end -0292 for j = 2:length(rxnCOBRAfields) %Start from 2, as 1 is rxnReferences -0293 if isfield(model,rxnCOBRAfields{j}) -0294 rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']); -0295 if ~isempty(rxnAnnotation) -0296 rxnAnnotation = strtrim(strsplit(rxnAnnotation,';')); -0297 for a=1:length(rxnAnnotation) -0298 newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j}; -0299 newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a}; -0300 counter=counter+1; -0301 end -0302 end -0303 end -0304 end -0305 end -0306 end -0307 if isfield(newModel,'rxnReferences') -0308 emptyEntry = cellfun(@isempty,newModel.rxnReferences); -0309 newModel.rxnReferences(emptyEntry)={''}; -0310 diffNumel = numel(newModel.rxns) - numel(newModel.rxnReferences); -0311 if diffNumel > 0 -0312 newModel.rxnReferences(end+1:end+diffNumel) = {''}; -0313 end -0314 end -0315 if any(isfield(model,geneCOBRAfields)) -0316 for i=1:numel(model.genes) -0317 counter=1; -0318 newModel.geneMiriams{i,1}=[]; -0319 for j = 1:length(geneCOBRAfields) -0320 if isfield(model,geneCOBRAfields{j}) -0321 geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']); -0322 if ~isempty(geneAnnotation) -0323 geneAnnotation = strtrim(strsplit(geneAnnotation,';')); -0324 for a=1:length(geneAnnotation) -0325 newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j}; -0326 newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a}; -0327 counter=counter+1; -0328 end -0329 end -0330 end -0331 end -0332 end -0333 end -0334 if isfield(model,'geneNames') -0335 newModel.geneShortNames=model.geneNames; -0336 end -0337 newModel.metNames=model.metNames; -0338 for i=1:numel(newModel.comps) -0339 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],''); -0340 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],''); -0341 end -0342 newModel.metNames=deblank(newModel.metNames); -0343 newModel.metComps=regexprep(model.mets,'^.+\[',''); -0344 newModel.metComps=regexprep(newModel.metComps,'\]$',''); -0345 [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps); -0346 if isfield(model,'metInChIString') -0347 newModel.inchis=regexprep(model.metInChIString,'^InChI=',''); -0348 end -0349 printWarning=false; -0350 if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID'])) -0351 for i=1:numel(model.mets) -0352 counter=1; -0353 newModel.metMiriams{i,1}=[]; -0354 if isfield(model,'metKEGGID') -0355 if ~isempty(model.metKEGGID{i}) -0356 if strcmp(model.metKEGGID{i}(1),'C') -0357 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound'; -0358 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; -0359 counter=counter+1; -0360 elseif strcmp(model.metKEGGID{i}(1),'G') -0361 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan'; -0362 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; -0363 counter=counter+1; -0364 end -0365 end -0366 end -0367 if isfield(model,'metPubChemID') -0368 if ~isempty(model.metPubChemID{i}) -0369 if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:') -0370 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; -0371 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; -0372 counter=counter+1; -0373 elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:') -0374 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance'; -0375 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; -0376 counter=counter+1; -0377 else -0378 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; -0379 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; -0380 counter=counter+1; -0381 printWarning=true; -0382 end -0383 end -0384 end -0385 for j = 1:length(metCOBRAfields) -0386 if isfield(model,metCOBRAfields{j}) -0387 metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']); -0388 if ~isempty(metAnnotation) -0389 metAnnotation = strtrim(strsplit(metAnnotation,';')); -0390 for a=1:length(metAnnotation) -0391 newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j}; -0392 newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a}; -0393 counter=counter+1; -0394 end -0395 end -0396 end -0397 end -0398 end -0399 end -0400 if printWarning -0401 fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n'); -0402 end -0403 end -0404 -0405 % Order fields -0406 newModel=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models -0407 end -0408 -0409 function rules=grrulesToRules(model) -0410 %This function just takes grRules, changes all gene names to -0411 %'x(geneNumber)' and also changes 'or' and 'and' relations to corresponding -0412 %symbols -0413 replacingGenes=cell([size(model.genes,1) 1]); -0414 for i=1:numel(replacingGenes) -0415 replacingGenes{i}=strcat('x(',num2str(i),')'); -0416 end -0417 rules = strcat({' '},model.grRules,{' '}); -0418 for i=1:length(model.genes) -0419 rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']); -0420 rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']); -0421 rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']); -0422 end -0423 rules=regexprep(rules,' and ',' & '); -0424 rules=regexprep(rules,' or ',' | '); -0425 rules=strtrim(rules); -0426 end -0427 -0428 function grRules=rulesTogrrules(model) -0429 %This function takes rules, replaces &/| for and/or, replaces the x(i) -0430 %format with the actual gene ID, and takes out extra whitespace and -0431 %redundant parenthesis introduced by COBRA, to create grRules. -0432 grRules = strrep(model.rules,'&','and'); -0433 grRules = strrep(grRules,'|','or'); -0434 for i = 1:length(model.genes) -0435 grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i}); -0436 end -0437 grRules = strrep(grRules,'( ','('); -0438 grRules = strrep(grRules,' )',')'); -0439 grRules = regexprep(grRules,'^(',''); %rules that start with a "(" -0440 grRules = regexprep(grRules,')$',''); %rules that end with a ")" -0441 end +0024 % fields are lost: geneEntrezID, modelVersion, proteins +0025 % +0026 % NOTE: The information about mandatory RAVEN fields was taken from +0027 % checkModelStruct function, whereas the corresponding information about +0028 % COBRA fields was fetched from verifyModel function +0029 % +0030 % Usage: newModel=ravenCobraWrapper(model) +0031 +0032 if isfield(model,'rules') +0033 isRaven=false; +0034 else +0035 isRaven=true; +0036 end +0037 +0038 ravenPath=findRAVENroot(); +0039 +0040 % Load COBRA field information +0041 fid = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv')); % Taken from https://github.com/opencobra/cobratoolbox/blob/develop/src/base/io/definitions/COBRA_structure_fields.csv +0042 fieldFile = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1); +0043 dbFields = ~cellfun(@isempty,fieldFile{5}); % Only keep fields with database annotations that should be translated to xxxMiriams +0044 dbFields = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'}); +0045 COBRAnamespace = fieldFile{5}(dbFields); +0046 COBRAnamespace = regexprep(COBRAnamespace,';.*',''); % Only keep first suggested namespace +0047 COBRAfields = fieldFile{1}(dbFields); +0048 fclose(fid); +0049 +0050 % Load conversion between additional COBRA fields and namespaces: +0051 fid = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv')); +0052 fieldFile = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0); +0053 COBRAfields = [COBRAfields; fieldFile{1}]; +0054 COBRAnamespace = [COBRAnamespace; fieldFile{2}]; +0055 rxnCOBRAfields = COBRAfields(startsWith(COBRAfields,'rxn')); +0056 rxnNamespaces = COBRAnamespace(startsWith(COBRAfields,'rxn')); +0057 metCOBRAfields = COBRAfields(startsWith(COBRAfields,'met')); +0058 metNamespaces = COBRAnamespace(startsWith(COBRAfields,'met')); +0059 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene')); +0060 geneNamespaces = COBRAnamespace(startsWith(COBRAfields,'gene')); +0061 fclose(fid); +0062 +0063 if isRaven +0064 %Firstly remove boundary metabolites +0065 model=simplifyModel(model); +0066 end +0067 +0068 % Keep fields that have identical names and content +0069 newModel.S=model.S; +0070 newModel.lb=model.lb; +0071 newModel.ub=model.ub; +0072 if isfield(model,'c') +0073 newModel.c=model.c; +0074 else +0075 newModel.c=zeros(numel(model.rxns),1); +0076 end +0077 newModel.rxns=model.rxns; +0078 optFields = {'rxnNames','rxnNotes','rxnConfidenceScores','rxnDeltaG',... +0079 'rxnGeneMat','rev','subSystems','comps','compNames','metCharges',... +0080 'metDeltaG','metFormulas','metNotes','metSmiles','genes','proteinNames'}; +0081 for i=1:length(optFields) +0082 if isfield(model,optFields{i}) +0083 newModel.(optFields{i})=model.(optFields{i}); +0084 end +0085 end +0086 +0087 % Convert unique fields +0088 if isRaven +0089 fprintf('Converting RAVEN structure to COBRA..\n'); +0090 %Convert from RAVEN to COBRA structure +0091 +0092 %Mandatory COBRA fields +0093 newModel.rxns=model.rxns; +0094 if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$'))) +0095 newModel.mets=model.mets; +0096 else +0097 %Check if model has compartment info as "met_c" suffix in all metabolites: +0098 BiGGformat = false(size(model.mets)); +0099 for i=1:numel(model.comps) +0100 compPos=model.metComps==i; +0101 BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$'])); +0102 end +0103 if all(BiGGformat) +0104 newModel.mets=model.mets; +0105 for i=1:numel(model.comps) +0106 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']); +0107 end +0108 else +0109 newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']'); +0110 end +0111 end +0112 +0113 %b, csense, osenseStr, genes, rules are also mandatory, but defined +0114 %later to match the order of fields +0115 +0116 %Optional COBRA fields +0117 if isfield(model,'id') +0118 newModel.modelID=model.id; +0119 end +0120 if isfield(model,'name') +0121 newModel.modelName=model.name; +0122 end +0123 if isfield(model,'eccodes') +0124 newModel.rxnECNumbers=model.eccodes; +0125 end +0126 if isfield(model,'rxnMiriams') +0127 [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams); +0128 for i = 1:length(rxnCOBRAfields) +0129 j=ismember(extractedMiriamNames,rxnNamespaces{i}); +0130 if any(j) +0131 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);']) +0132 end +0133 end +0134 end +0135 if isfield(model,'rxnReferences') % Concatenate model.rxnReferences to those extracted from model.rxnMiriams +0136 if isfield(newModel,'rxnReferences') +0137 newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences); +0138 newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $',''); +0139 else +0140 newModel.rxnReferences = model.rxnReferences; +0141 end +0142 end +0143 if isfield(model,'metNames') +0144 newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']'); +0145 end +0146 if isfield(model,'metMiriams') +0147 [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams); +0148 %Shorten miriam names for KEGG and PubChem. These shorter names +0149 %will be used later to concatenate KEGG COMPOUND/GLYCAN and PubChem +0150 %Compound/Substance, into corresponding COBRA model fields +0151 extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg'); +0152 extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem'); +0153 i=ismember(extractedMiriamNames,'kegg'); +0154 if any(i) % Combine KEGG compounds and glycans +0155 for j=1:length(i) +0156 if i(j) && isfield(newModel,'metKEGGID')~=1 +0157 newModel.metKEGGID=miriams(:,j); +0158 elseif i(j) +0159 newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j)); +0160 end +0161 end +0162 newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$',''); +0163 end +0164 i=ismember(extractedMiriamNames,'pubchem'); +0165 if any(i) % Combine Pubchem compounds and substances +0166 for j=1:length(i) +0167 if i(j) && isfield(newModel,'metPubChemID')~=1 +0168 newModel.metPubChemID=miriams(:,j); +0169 elseif i(j) +0170 newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j)); +0171 end +0172 end +0173 newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$',''); +0174 end +0175 %All other Miriams can be directly parsed with no modifications: +0176 for i = 1:length(metCOBRAfields) +0177 j=ismember(extractedMiriamNames,metNamespaces{i}); +0178 if any(j) +0179 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);']) +0180 end +0181 end +0182 end +0183 if isfield(model,'inchis') +0184 newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$',''); +0185 end +0186 newModel.b=zeros(numel(model.mets),1); +0187 newModel.csense=repmat('E',size(model.mets)); +0188 if isfield(model,'geneMiriams') +0189 [miriams,extractedMiriamNames]=extractMiriam(model.geneMiriams); +0190 for i = 1:length(geneCOBRAfields) +0191 j=ismember(extractedMiriamNames,geneNamespaces{i}); +0192 if any(j) +0193 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);']) +0194 end +0195 end +0196 end +0197 if isfield(model,'geneShortNames') +0198 newModel.geneNames=model.geneShortNames; +0199 end +0200 if isfield(model,'genes') +0201 newModel.rules=grrulesToRules(model); +0202 else +0203 fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n'); +0204 end +0205 newModel.osenseStr='max'; +0206 else +0207 fprintf('Converting COBRA structure to RAVEN..\n'); +0208 %Convert from COBRA to RAVEN structure +0209 +0210 %Mandatory RAVEN fields +0211 newModel.mets=model.mets; +0212 if ~isfield(model,'comps') +0213 %Since 'comps' field is not mandatory in COBRA, it may be required +0214 %to obtain the non-redundant list of comps from metabolite ids, if +0215 %'comps' field is not available +0216 newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1')); +0217 newModel.compNames = newModel.comps; +0218 end +0219 for i=1:numel(newModel.comps) +0220 newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],''); +0221 newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],''); +0222 end +0223 +0224 %In some cases (e.g. any model that uses BiGG ids as main ids), there +0225 %may be overlapping mets due to removal of compartment info. To avoid +0226 %this, we change compartments from e.g. [c] into _c +0227 if numel(unique(newModel.mets))~=numel(model.mets) +0228 newModel.mets=model.mets; +0229 for i=1:numel(newModel.comps) +0230 newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]); +0231 end +0232 end +0233 %Since COBRA no longer contains rev field it is assumed that rxn is +0234 %reversible if its lower bound is set below zero +0235 if ~isfield(model,'rev') +0236 for i=1:numel(model.rxns) +0237 if model.lb(i)<0 +0238 newModel.rev(i,1)=1; +0239 else +0240 newModel.rev(i,1)=0; +0241 end +0242 end +0243 end +0244 newModel.b=zeros(numel(model.mets),1); +0245 +0246 %metComps is also mandatory, but defined later to match the order of +0247 %fields +0248 +0249 %Fields 'name' and 'id' are also considered as mandatory, but +0250 %these are added to the model during exportModel/exportToExcelFormat +0251 %anyway, so there is no point to add this information here +0252 +0253 %Optional RAVEN fields +0254 if isfield(model,'modelID') +0255 newModel.id=model.modelID; +0256 end +0257 if isfield(model,'modelName') +0258 newModel.name=model.modelName; +0259 end +0260 if isfield(model,'rules') && ~isfield(model,'grRules') +0261 model.grRules = rulesTogrrules(model); +0262 end +0263 if isfield(model,'grRules') +0264 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0265 newModel.grRules = grRules; +0266 newModel.rxnGeneMat = rxnGeneMat; +0267 end +0268 if isfield(model,'rxnECNumbers') +0269 newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:',''); +0270 end +0271 if any(isfield(model,rxnCOBRAfields)) +0272 for i=1:numel(model.rxns) +0273 counter=1; +0274 newModel.rxnMiriams{i,1}=[]; +0275 if isfield(model,'rxnReferences') +0276 if ~isempty(model.rxnReferences{i}) +0277 pmids = model.rxnReferences{i}; +0278 pmids = strsplit(pmids,'; '); +0279 nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once')); +0280 if any(nonPmids) %Not a pubmed id, keep in rxnReferences instead +0281 newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', '); +0282 pmids(nonPmids)=[]; +0283 end +0284 for j = 1:length(pmids) +0285 newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed'; +0286 newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j}; +0287 counter=counter+1; +0288 end +0289 end +0290 end +0291 for j = 2:length(rxnCOBRAfields) %Start from 2, as 1 is rxnReferences +0292 if isfield(model,rxnCOBRAfields{j}) +0293 rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']); +0294 if ~isempty(rxnAnnotation) +0295 rxnAnnotation = strtrim(strsplit(rxnAnnotation,';')); +0296 for a=1:length(rxnAnnotation) +0297 newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j}; +0298 newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a}; +0299 counter=counter+1; +0300 end +0301 end +0302 end +0303 end +0304 end +0305 end +0306 if isfield(newModel,'rxnReferences') +0307 emptyEntry = cellfun(@isempty,newModel.rxnReferences); +0308 newModel.rxnReferences(emptyEntry)={''}; +0309 diffNumel = numel(newModel.rxns) - numel(newModel.rxnReferences); +0310 if diffNumel > 0 +0311 newModel.rxnReferences(end+1:end+diffNumel) = {''}; +0312 end +0313 end +0314 if any(isfield(model,geneCOBRAfields)) +0315 for i=1:numel(model.genes) +0316 counter=1; +0317 newModel.geneMiriams{i,1}=[]; +0318 for j = 1:length(geneCOBRAfields) +0319 if isfield(model,geneCOBRAfields{j}) +0320 geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']); +0321 if ~isempty(geneAnnotation) +0322 geneAnnotation = strtrim(strsplit(geneAnnotation,';')); +0323 for a=1:length(geneAnnotation) +0324 newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j}; +0325 newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a}; +0326 counter=counter+1; +0327 end +0328 end +0329 end +0330 end +0331 end +0332 end +0333 if isfield(model,'geneNames') +0334 newModel.geneShortNames=model.geneNames; +0335 end +0336 newModel.metNames=model.metNames; +0337 for i=1:numel(newModel.comps) +0338 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],''); +0339 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],''); +0340 end +0341 newModel.metNames=deblank(newModel.metNames); +0342 newModel.metComps=regexprep(model.mets,'^.+\[',''); +0343 newModel.metComps=regexprep(newModel.metComps,'\]$',''); +0344 [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps); +0345 if isfield(model,'metInChIString') +0346 newModel.inchis=regexprep(model.metInChIString,'^InChI=',''); +0347 end +0348 printWarning=false; +0349 if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID'])) +0350 for i=1:numel(model.mets) +0351 counter=1; +0352 newModel.metMiriams{i,1}=[]; +0353 if isfield(model,'metKEGGID') +0354 if ~isempty(model.metKEGGID{i}) +0355 if strcmp(model.metKEGGID{i}(1),'C') +0356 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound'; +0357 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; +0358 counter=counter+1; +0359 elseif strcmp(model.metKEGGID{i}(1),'G') +0360 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan'; +0361 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; +0362 counter=counter+1; +0363 end +0364 end +0365 end +0366 if isfield(model,'metPubChemID') +0367 if ~isempty(model.metPubChemID{i}) +0368 if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:') +0369 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; +0370 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; +0371 counter=counter+1; +0372 elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:') +0373 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance'; +0374 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; +0375 counter=counter+1; +0376 else +0377 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; +0378 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; +0379 counter=counter+1; +0380 printWarning=true; +0381 end +0382 end +0383 end +0384 for j = 1:length(metCOBRAfields) +0385 if isfield(model,metCOBRAfields{j}) +0386 metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']); +0387 if ~isempty(metAnnotation) +0388 metAnnotation = strtrim(strsplit(metAnnotation,';')); +0389 for a=1:length(metAnnotation) +0390 newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j}; +0391 newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a}; +0392 counter=counter+1; +0393 end +0394 end +0395 end +0396 end +0397 end +0398 end +0399 if printWarning +0400 fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n'); +0401 end +0402 end +0403 +0404 % Order fields +0405 newModel=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models +0406 end +0407 +0408 function rules=grrulesToRules(model) +0409 %This function just takes grRules, changes all gene names to +0410 %'x(geneNumber)' and also changes 'or' and 'and' relations to corresponding +0411 %symbols +0412 replacingGenes=cell([size(model.genes,1) 1]); +0413 for i=1:numel(replacingGenes) +0414 replacingGenes{i}=strcat('x(',num2str(i),')'); +0415 end +0416 rules = strcat({' '},model.grRules,{' '}); +0417 for i=1:length(model.genes) +0418 rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']); +0419 rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']); +0420 rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']); +0421 end +0422 rules=regexprep(rules,' and ',' & '); +0423 rules=regexprep(rules,' or ',' | '); +0424 rules=strtrim(rules); +0425 end +0426 +0427 function grRules=rulesTogrrules(model) +0428 %This function takes rules, replaces &/| for and/or, replaces the x(i) +0429 %format with the actual gene ID, and takes out extra whitespace and +0430 %redundant parenthesis introduced by COBRA, to create grRules. +0431 grRules = strrep(model.rules,'&','and'); +0432 grRules = strrep(grRules,'|','or'); +0433 for i = 1:length(model.genes) +0434 grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i}); +0435 end +0436 grRules = strrep(grRules,'( ','('); +0437 grRules = strrep(grRules,' )',')'); +0438 grRules = regexprep(grRules,'^(',''); %rules that start with a "(" +0439 grRules = regexprep(grRules,')$',''); %rules that end with a ")" +0440 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/testing/unit_tests/fillGapsLargeTests.html b/doc/testing/unit_tests/fillGapsLargeTests.html index f70820d7..9b44640b 100644 --- a/doc/testing/unit_tests/fillGapsLargeTests.html +++ b/doc/testing/unit_tests/fillGapsLargeTests.html @@ -54,7 +54,7 @@

SOURCE CODE ^try 0008 gurobi_read('solverTests.m'); 0009 catch ME -0010 if ~startsWith(ME.message,'Gurobi error 10012') % Expected error code, others may indicate problems with license +0010 if ~startsWith(ME.message,{'Gurobi error 10012','Gurobi error 10003'}) % Expected error codes, others may indicate problems with license 0011 testGurobi = false; 0012 end 0013 end diff --git a/io/exportModel.m b/io/exportModel.m index 3ac8dbc6..3bc61c4f 100755 --- a/io/exportModel.m +++ b/io/exportModel.m @@ -99,6 +99,9 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) if ~isfield(model,'geneShortNames') && isfield(model,'genes') model.geneShortNames=cell(numel(model.genes),1); end +if ~isfield(model,'proteinNames') && isfield(model,'genes') + model.proteinNames=cell(numel(model.genes),1); +end if ~isfield(model,'subSystems') model.subSystems=cell(numel(model.rxns),1); end @@ -394,6 +397,11 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings,sortIds) modelSBML.fbc_geneProduct(i).fbc_label=model.geneShortNames{i}; end end + if isfield(modelSBML.fbc_geneProduct, 'fbc_name') && isfield(model,'proteinNames') + if ~isempty(model.proteinNames{i}) + modelSBML.fbc_geneProduct(i).fbc_name=model.proteinNames{i}; + end + end end if exportGeneComplexes==true %Also add the complexes as genes. This is done by splitting grRules diff --git a/io/importModel.m b/io/importModel.m index 5b422873..615cf389 100755 --- a/io/importModel.m +++ b/io/importModel.m @@ -47,6 +47,7 @@ % geneComps compartments for genes % geneMiriams structure with MIRIAM information about the genes % geneShortNames gene alternative names (e.g. ERG10) +% proteinNames protein associated to each gene % metNames metabolite description % metComps compartments for metabolites % inchis InChI-codes for metabolites @@ -118,6 +119,7 @@ model.geneComps=[]; model.geneMiriams={}; model.geneShortNames={}; +model.proteinNames={}; model.metNames={}; model.metComps=[]; model.inchis={}; @@ -208,6 +210,7 @@ geneIDs={}; geneMiriams={}; geneShortNames={}; +proteinNames={}; geneCompartments={}; complexIDs={}; complexNames={}; @@ -923,6 +926,7 @@ end end end + proteinNames={modelSBML.fbc_geneProduct.fbc_name}; else genes=getGeneList(grRules); end @@ -1014,6 +1018,11 @@ model.geneMiriams=geneMiriams; end +%If any protein strings have been loaded +if any(~cellfun(@isempty,proteinNames)) + model.proteinNames=proteinNames; +end + model.unconstrained=metaboliteUnconstrained; %Convert SBML IDs back into their original strings. Here we are using part @@ -1078,6 +1087,9 @@ if isempty(model.geneShortNames) model=rmfield(model,'geneShortNames'); end +if isempty(model.proteinNames) + model=rmfield(model,'proteinNames'); +end if isempty(model.inchis) model=rmfield(model,'inchis'); end diff --git a/io/readYAMLmodel.m b/io/readYAMLmodel.m index ee9f3463..f92fed25 100755 --- a/io/readYAMLmodel.m +++ b/io/readYAMLmodel.m @@ -100,6 +100,7 @@ 'geneComps',cell(0,0);... %Changed to double in the end. 'geneMiriams',cell(0,0);... 'geneShortNames',cell(0,0);... + 'proteinNames',cell(0,0);... 'unconstrained',cell(0,0);... %Changed to double in the end. 'metFrom',cell(0,0);... 'rxnFrom',cell(0,0)}; @@ -373,6 +374,8 @@ miriamKey = ''; case 'name' model = readFieldValue(model, 'geneShortNames', tline_value, pos); + case 'protein' + model = readFieldValue(model, 'proteinNames', tline_value, pos); case 'annotation' readList = 'annotation'; otherwise @@ -568,7 +571,7 @@ model = emptyOrFill(model,i{1},1,'mets'); end % Genes -for i={'geneMiriams','geneShortNames'} % Empty strings +for i={'geneMiriams','geneShortNames','proteinNames'} % Empty strings model = emptyOrFill(model,i{1},{''},'genes'); end for i={'geneComps'} % Ones, assume first compartment diff --git a/io/writeYAMLmodel.m b/io/writeYAMLmodel.m index 1448b85a..dc804665 100755 --- a/io/writeYAMLmodel.m +++ b/io/writeYAMLmodel.m @@ -113,6 +113,7 @@ function writeYAMLmodel(model,fileName,preserveQuotes,sortIds) fprintf(fid,' - !!omap\n'); writeField(model, fid, 'genes', 'txt', i, ' - id', preserveQuotes) writeField(model, fid, 'geneShortNames', 'txt', i, ' - name', preserveQuotes) + writeField(model, fid, 'proteinNames', 'txt', i, ' - protein', preserveQuotes) writeField(model, fid, 'geneMiriams', 'txt', i, ' - annotation', preserveQuotes) end diff --git a/struct_conversion/orderRavenFields.csv b/struct_conversion/orderRavenFields.csv index 1ddfc6ef..9a56c95c 100644 --- a/struct_conversion/orderRavenFields.csv +++ b/struct_conversion/orderRavenFields.csv @@ -30,6 +30,7 @@ genes geneComps geneMiriams geneShortNames +proteins metNames metComps inchis diff --git a/struct_conversion/ravenCobraWrapper.m b/struct_conversion/ravenCobraWrapper.m index 1f71d387..27a7fe32 100755 --- a/struct_conversion/ravenCobraWrapper.m +++ b/struct_conversion/ravenCobraWrapper.m @@ -21,8 +21,7 @@ % reconstructed based on lower bound reaction values % % NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following -% fields are lost: geneEntrezID, metSmiles, modelVersion, -% proteinNames, proteins +% fields are lost: geneEntrezID, modelVersion, proteins % % NOTE: The information about mandatory RAVEN fields was taken from % checkModelStruct function, whereas the corresponding information about @@ -76,9 +75,9 @@ newModel.c=zeros(numel(model.rxns),1); end newModel.rxns=model.rxns; -optFields = {'rxnNames','subSystems','rxnNotes','metDeltaG','rxnDeltaG',... - 'metFormulas','comps','compNames','metCharges','genes',... - 'rxnConfidenceScores','rxnGeneMat','metNotes','rev'}; +optFields = {'rxnNames','rxnNotes','rxnConfidenceScores','rxnDeltaG',... + 'rxnGeneMat','rev','subSystems','comps','compNames','metCharges',... + 'metDeltaG','metFormulas','metNotes','metSmiles','genes','proteinNames'}; for i=1:length(optFields) if isfield(model,optFields{i}) newModel.(optFields{i})=model.(optFields{i}); diff --git a/testing/unit_tests/fillGapsLargeTests.m b/testing/unit_tests/fillGapsLargeTests.m index 34978720..1c0576d4 100755 --- a/testing/unit_tests/fillGapsLargeTests.m +++ b/testing/unit_tests/fillGapsLargeTests.m @@ -7,7 +7,7 @@ try gurobi_read('solverTests.m'); catch ME - if ~startsWith(ME.message,'Gurobi error 10012') % Expected error code, others may indicate problems with license + if ~startsWith(ME.message,{'Gurobi error 10012','Gurobi error 10003'}) % Expected error codes, others may indicate problems with license testGurobi = false; end end