diff --git a/doc/struct_conversion/ravenCobraWrapper.html b/doc/struct_conversion/ravenCobraWrapper.html index 556cdc4a..7f06866e 100644 --- a/doc/struct_conversion/ravenCobraWrapper.html +++ b/doc/struct_conversion/ravenCobraWrapper.html @@ -36,7 +36,9 @@

DESCRIPTION ^SOURCE CODE ^% 0009 % This function is a bidirectional tool to convert between RAVEN and 0010 % COBRA structures. It recognises COBRA structure by checking field -0011 % 'rules' existense, which is only found in COBRA Toolbox structure. -0012 % -0013 % NOTE: During RAVEN -> COBRA -> RAVEN conversion cycle the following -0014 % fields are lost: annotation, compOutside, compMiriams, rxnComps, -0015 % geneComps, unconstrained. Boundary metabolites are lost, because COBRA -0016 % structure does not involve boundary metabolites, so they are removed -0017 % using simplifyModel before RAVEN -> COBRA conversion. The field 'rev' -0018 % is also partially lost, but during COBRA -> RAVEN conversion it's -0019 % reconstructed based on lower bound reaction values -0020 % -0021 % NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following -0022 % fields are lost: geneEntrezID, metSmiles, modelVersion, -0023 % proteinNames, proteins -0024 % -0025 % NOTE: The information about mandatory RAVEN fields was taken from -0026 % checkModelStruct function, whereas the corresponding information about -0027 % COBRA fields was fetched from verifyModel function -0028 % -0029 % Usage: newModel=ravenCobraWrapper(model) -0030 -0031 if isfield(model,'rules') -0032 isRaven=false; -0033 else -0034 isRaven=true; -0035 end -0036 -0037 ravenPath=findRAVENroot(); +0011 % 'rules' existense, which is only found in COBRA Toolbox structure. If +0012 % the COBRA model also has a grRules field, then this will be used +0013 % instead of parsing the rules field. +0014 % +0015 % NOTE: During RAVEN -> COBRA -> RAVEN conversion cycle the following +0016 % fields are lost: annotation, compOutside, compMiriams, rxnComps, +0017 % geneComps, unconstrained. Boundary metabolites are lost, because COBRA +0018 % structure does not involve boundary metabolites, so they are removed +0019 % using simplifyModel before RAVEN -> COBRA conversion. The field 'rev' +0020 % is also partially lost, but during COBRA -> RAVEN conversion it's +0021 % reconstructed based on lower bound reaction values +0022 % +0023 % NOTE: During COBRA -> RAVEN -> COBRA conversion cycle the following +0024 % fields are lost: geneEntrezID, metSmiles, modelVersion, +0025 % proteinNames, proteins +0026 % +0027 % NOTE: The information about mandatory RAVEN fields was taken from +0028 % checkModelStruct function, whereas the corresponding information about +0029 % COBRA fields was fetched from verifyModel function +0030 % +0031 % Usage: newModel=ravenCobraWrapper(model) +0032 +0033 if isfield(model,'rules') +0034 isRaven=false; +0035 else +0036 isRaven=true; +0037 end 0038 -0039 % Load COBRA field information -0040 fid = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv')); % Taken from https://github.com/opencobra/cobratoolbox/blob/develop/src/base/io/definitions/COBRA_structure_fields.csv -0041 fieldFile = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1); -0042 dbFields = ~cellfun(@isempty,fieldFile{5}); % Only keep fields with database annotations that should be translated to xxxMiriams -0043 dbFields = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'}); -0044 COBRAnamespace = fieldFile{5}(dbFields); -0045 COBRAnamespace = regexprep(COBRAnamespace,';.*',''); % Only keep first suggested namespace -0046 COBRAfields = fieldFile{1}(dbFields); -0047 fclose(fid); -0048 -0049 % Load conversion between additional COBRA fields and namespaces: -0050 fid = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv')); -0051 fieldFile = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0); -0052 COBRAfields = [COBRAfields; fieldFile{1}]; -0053 COBRAnamespace = [COBRAnamespace; fieldFile{2}]; -0054 rxnCOBRAfields = COBRAfields(startsWith(COBRAfields,'rxn')); -0055 rxnNamespaces = COBRAnamespace(startsWith(COBRAfields,'rxn')); -0056 metCOBRAfields = COBRAfields(startsWith(COBRAfields,'met')); -0057 metNamespaces = COBRAnamespace(startsWith(COBRAfields,'met')); -0058 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene')); -0059 geneNamespaces = COBRAnamespace(startsWith(COBRAfields,'gene')); -0060 fclose(fid); -0061 -0062 if isRaven -0063 %Firstly remove boundary metabolites -0064 model=simplifyModel(model); -0065 end -0066 -0067 % Keep fields that have identical names and content -0068 newModel.S=model.S; -0069 newModel.lb=model.lb; -0070 newModel.ub=model.ub; -0071 if isfield(model,'c') -0072 newModel.c=model.c; -0073 else -0074 newModel.c=zeros(numel(model.rxns),1); -0075 end -0076 newModel.rxns=model.rxns; -0077 optFields = {'rxnNames','subSystems','rxnNotes','metDeltaG','rxnDeltaG',... -0078 'metFormulas','comps','compNames','metCharges','genes',... -0079 'rxnConfidenceScores','rxnGeneMat','metNotes','rev'}; -0080 for i=1:length(optFields) -0081 if isfield(model,optFields{i}) -0082 newModel.(optFields{i})=model.(optFields{i}); -0083 end -0084 end -0085 -0086 % Convert unique fields -0087 if isRaven -0088 fprintf('Converting RAVEN structure to COBRA..\n'); -0089 %Convert from RAVEN to COBRA structure -0090 -0091 %Mandatory COBRA fields -0092 newModel.rxns=model.rxns; -0093 if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$'))) -0094 newModel.mets=model.mets; -0095 else -0096 %Check if model has compartment info as "met_c" suffix in all metabolites: -0097 BiGGformat = false(size(model.mets)); -0098 for i=1:numel(model.comps) -0099 compPos=model.metComps==i; -0100 BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$'])); -0101 end -0102 if all(BiGGformat) -0103 newModel.mets=model.mets; -0104 for i=1:numel(model.comps) -0105 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']); -0106 end -0107 else -0108 newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']'); -0109 end -0110 end -0111 -0112 %b, csense, osenseStr, genes, rules are also mandatory, but defined -0113 %later to match the order of fields -0114 -0115 %Optional COBRA fields -0116 if isfield(model,'id') -0117 newModel.modelID=model.id; -0118 end -0119 if isfield(model,'name') -0120 newModel.modelName=model.name; -0121 end -0122 if isfield(model,'eccodes') -0123 newModel.rxnECNumbers=model.eccodes; -0124 end -0125 if isfield(model,'rxnMiriams') -0126 [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams); -0127 for i = 1:length(rxnCOBRAfields) -0128 j=ismember(extractedMiriamNames,rxnNamespaces{i}); -0129 if any(j) -0130 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);']) -0131 end -0132 end -0133 end -0134 if isfield(model,'rxnReferences') % Concatenate model.rxnReferences to those extracted from model.rxnMiriams -0135 if isfield(newModel,'rxnReferences') -0136 newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences); -0137 newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $',''); -0138 else -0139 newModel.rxnReferences = model.rxnReferences; -0140 end -0141 end -0142 if isfield(model,'metNames') -0143 newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']'); -0144 end -0145 if isfield(model,'metMiriams') -0146 [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams); -0147 %Shorten miriam names for KEGG and PubChem. These shorter names -0148 %will be used later to concatenate KEGG COMPOUND/GLYCAN and PubChem -0149 %Compound/Substance, into corresponding COBRA model fields -0150 extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg'); -0151 extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem'); -0152 i=ismember(extractedMiriamNames,'kegg'); -0153 if any(i) % Combine KEGG compounds and glycans -0154 for j=1:length(i) -0155 if i(j) && isfield(newModel,'metKEGGID')~=1 -0156 newModel.metKEGGID=miriams(:,j); -0157 elseif i(j) -0158 newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j)); -0159 end -0160 end -0161 newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$',''); -0162 end -0163 i=ismember(extractedMiriamNames,'pubchem'); -0164 if any(i) % Combine Pubchem compounds and substances -0165 for j=1:length(i) -0166 if i(j) && isfield(newModel,'metPubChemID')~=1 -0167 newModel.metPubChemID=miriams(:,j); -0168 elseif i(j) -0169 newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j)); -0170 end -0171 end -0172 newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$',''); -0173 end -0174 %All other Miriams can be directly parsed with no modifications: -0175 for i = 1:length(metCOBRAfields) -0176 j=ismember(extractedMiriamNames,metNamespaces{i}); -0177 if any(j) -0178 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);']) -0179 end -0180 end -0181 end -0182 if isfield(model,'inchis') -0183 newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$',''); -0184 end -0185 newModel.b=zeros(numel(model.mets),1); -0186 newModel.csense=repmat('E',size(model.mets)); -0187 if isfield(model,'geneMiriams') -0188 [miriams,extractedMiriamNames]=extractMiriam(model.geneMiriams); -0189 for i = 1:length(geneCOBRAfields) -0190 j=ismember(extractedMiriamNames,geneNamespaces{i}); -0191 if any(j) -0192 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);']) -0193 end -0194 end -0195 end -0196 if isfield(model,'geneShortNames') -0197 newModel.geneNames=model.geneShortNames; -0198 end -0199 if isfield(model,'genes') -0200 newModel.rules=grrulesToRules(model); -0201 else -0202 fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n'); -0203 end -0204 newModel.osenseStr='max'; -0205 else -0206 fprintf('Converting COBRA structure to RAVEN..\n'); -0207 %Convert from COBRA to RAVEN structure -0208 -0209 %Mandatory RAVEN fields -0210 newModel.mets=model.mets; -0211 if ~isfield(model,'comps') -0212 %Since 'comps' field is not mandatory in COBRA, it may be required -0213 %to obtain the non-redundant list of comps from metabolite ids, if -0214 %'comps' field is not available -0215 newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1')); -0216 newModel.compNames = newModel.comps; -0217 end -0218 for i=1:numel(newModel.comps) -0219 newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],''); -0220 newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],''); -0221 end -0222 -0223 %In some cases (e.g. any model that uses BiGG ids as main ids), there -0224 %may be overlapping mets due to removal of compartment info. To avoid -0225 %this, we change compartments from e.g. [c] into _c -0226 if numel(unique(newModel.mets))~=numel(model.mets) -0227 newModel.mets=model.mets; -0228 for i=1:numel(newModel.comps) -0229 newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]); -0230 end -0231 end -0232 %Since COBRA no longer contains rev field it is assumed that rxn is -0233 %reversible if its lower bound is set below zero -0234 if ~isfield(model,'rev') -0235 for i=1:numel(model.rxns) -0236 if model.lb(i)<0 -0237 newModel.rev(i,1)=1; -0238 else -0239 newModel.rev(i,1)=0; -0240 end -0241 end -0242 end -0243 newModel.b=zeros(numel(model.mets),1); -0244 -0245 %metComps is also mandatory, but defined later to match the order of -0246 %fields -0247 -0248 %Fields 'name' and 'id' are also considered as mandatory, but -0249 %these are added to the model during exportModel/exportToExcelFormat -0250 %anyway, so there is no point to add this information here -0251 -0252 %Optional RAVEN fields -0253 if isfield(model,'modelID') -0254 newModel.id=model.modelID; -0255 end -0256 if isfield(model,'modelName') -0257 newModel.name=model.modelName; -0258 end -0259 if isfield(model,'rules') -0260 model.grRules = rulesTogrrules(model); -0261 [grRules,rxnGeneMat] = standardizeGrRules(model,true); -0262 newModel.grRules = grRules; -0263 newModel.rxnGeneMat = rxnGeneMat; -0264 end -0265 if isfield(model,'rxnECNumbers') -0266 newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:',''); -0267 end -0268 if any(isfield(model,rxnCOBRAfields)) -0269 for i=1:numel(model.rxns) -0270 counter=1; -0271 newModel.rxnMiriams{i,1}=[]; -0272 if isfield(model,'rxnReferences') -0273 if ~isempty(model.rxnReferences{i}) -0274 pmids = model.rxnReferences{i}; -0275 pmids = strsplit(pmids,'; '); -0276 nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once')); -0277 if any(nonPmids) %Not a pubmed id, keep in rxnReferences instead -0278 newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', '); -0279 pmids(nonPmids)=[]; -0280 end -0281 for j = 1:length(pmids) -0282 newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed'; -0283 newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j}; -0284 counter=counter+1; -0285 end -0286 end -0287 end -0288 for j = 2:length(rxnCOBRAfields) %Start from 2, as 1 is rxnReferences -0289 if isfield(model,rxnCOBRAfields{j}) -0290 rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']); -0291 if ~isempty(rxnAnnotation) -0292 rxnAnnotation = strtrim(strsplit(rxnAnnotation,';')); -0293 for a=1:length(rxnAnnotation) -0294 newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j}; -0295 newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a}; -0296 counter=counter+1; -0297 end -0298 end -0299 end -0300 end -0301 end -0302 end -0303 if isfield(newModel,'rxnReferences') -0304 emptyEntry = cellfun(@isempty,newModel.rxnReferences); -0305 newModel.rxnReferences(emptyEntry)={''}; +0039 ravenPath=findRAVENroot(); +0040 +0041 % Load COBRA field information +0042 fid = fopen(fullfile(ravenPath,'struct_conversion','COBRA_structure_fields.csv')); % Taken from https://github.com/opencobra/cobratoolbox/blob/develop/src/base/io/definitions/COBRA_structure_fields.csv +0043 fieldFile = textscan(fid,repmat('%s',1,15),'Delimiter','\t','HeaderLines',1); +0044 dbFields = ~cellfun(@isempty,fieldFile{5}); % Only keep fields with database annotations that should be translated to xxxMiriams +0045 dbFields = dbFields & ~contains(fieldFile{1},{'metInChIString','metKEGGID','metPubChemID','rxnECNumbers'}); +0046 COBRAnamespace = fieldFile{5}(dbFields); +0047 COBRAnamespace = regexprep(COBRAnamespace,';.*',''); % Only keep first suggested namespace +0048 COBRAfields = fieldFile{1}(dbFields); +0049 fclose(fid); +0050 +0051 % Load conversion between additional COBRA fields and namespaces: +0052 fid = fopen(fullfile(ravenPath,'struct_conversion','cobraNamespaces.csv')); +0053 fieldFile = textscan(fid,'%s %s','Delimiter',',','HeaderLines',0); +0054 COBRAfields = [COBRAfields; fieldFile{1}]; +0055 COBRAnamespace = [COBRAnamespace; fieldFile{2}]; +0056 rxnCOBRAfields = COBRAfields(startsWith(COBRAfields,'rxn')); +0057 rxnNamespaces = COBRAnamespace(startsWith(COBRAfields,'rxn')); +0058 metCOBRAfields = COBRAfields(startsWith(COBRAfields,'met')); +0059 metNamespaces = COBRAnamespace(startsWith(COBRAfields,'met')); +0060 geneCOBRAfields = COBRAfields(startsWith(COBRAfields,'gene')); +0061 geneNamespaces = COBRAnamespace(startsWith(COBRAfields,'gene')); +0062 fclose(fid); +0063 +0064 if isRaven +0065 %Firstly remove boundary metabolites +0066 model=simplifyModel(model); +0067 end +0068 +0069 % Keep fields that have identical names and content +0070 newModel.S=model.S; +0071 newModel.lb=model.lb; +0072 newModel.ub=model.ub; +0073 if isfield(model,'c') +0074 newModel.c=model.c; +0075 else +0076 newModel.c=zeros(numel(model.rxns),1); +0077 end +0078 newModel.rxns=model.rxns; +0079 optFields = {'rxnNames','subSystems','rxnNotes','metDeltaG','rxnDeltaG',... +0080 'metFormulas','comps','compNames','metCharges','genes',... +0081 'rxnConfidenceScores','rxnGeneMat','metNotes','rev'}; +0082 for i=1:length(optFields) +0083 if isfield(model,optFields{i}) +0084 newModel.(optFields{i})=model.(optFields{i}); +0085 end +0086 end +0087 +0088 % Convert unique fields +0089 if isRaven +0090 fprintf('Converting RAVEN structure to COBRA..\n'); +0091 %Convert from RAVEN to COBRA structure +0092 +0093 %Mandatory COBRA fields +0094 newModel.rxns=model.rxns; +0095 if all(~cellfun(@isempty,regexp(model.mets,'\[[^\]]+\]$'))) +0096 newModel.mets=model.mets; +0097 else +0098 %Check if model has compartment info as "met_c" suffix in all metabolites: +0099 BiGGformat = false(size(model.mets)); +0100 for i=1:numel(model.comps) +0101 compPos=model.metComps==i; +0102 BiGGformat(compPos)=~cellfun(@isempty,regexp(model.mets(compPos),['_' model.comps{i} '$'])); +0103 end +0104 if all(BiGGformat) +0105 newModel.mets=model.mets; +0106 for i=1:numel(model.comps) +0107 newModel.mets=regexprep(newModel.mets,['_' model.comps{i} '$'],['[' model.comps{i} ']']); +0108 end +0109 else +0110 newModel.mets=strcat(model.mets,'[',model.comps(model.metComps),']'); +0111 end +0112 end +0113 +0114 %b, csense, osenseStr, genes, rules are also mandatory, but defined +0115 %later to match the order of fields +0116 +0117 %Optional COBRA fields +0118 if isfield(model,'id') +0119 newModel.modelID=model.id; +0120 end +0121 if isfield(model,'name') +0122 newModel.modelName=model.name; +0123 end +0124 if isfield(model,'eccodes') +0125 newModel.rxnECNumbers=model.eccodes; +0126 end +0127 if isfield(model,'rxnMiriams') +0128 [miriams,extractedMiriamNames]=extractMiriam(model.rxnMiriams); +0129 for i = 1:length(rxnCOBRAfields) +0130 j=ismember(extractedMiriamNames,rxnNamespaces{i}); +0131 if any(j) +0132 eval(['newModel.' rxnCOBRAfields{i} ' = miriams(:,j);']) +0133 end +0134 end +0135 end +0136 if isfield(model,'rxnReferences') % Concatenate model.rxnReferences to those extracted from model.rxnMiriams +0137 if isfield(newModel,'rxnReferences') +0138 newModel.rxnReferences = strcat(newModel.rxnReferences,{'; '},model.rxnReferences); +0139 newModel.rxnReferences = regexprep(newModel.rxnReferences,'^; $',''); +0140 else +0141 newModel.rxnReferences = model.rxnReferences; +0142 end +0143 end +0144 if isfield(model,'metNames') +0145 newModel.metNames=strcat(model.metNames,' [',model.compNames(model.metComps),']'); +0146 end +0147 if isfield(model,'metMiriams') +0148 [miriams,extractedMiriamNames]=extractMiriam(model.metMiriams); +0149 %Shorten miriam names for KEGG and PubChem. These shorter names +0150 %will be used later to concatenate KEGG COMPOUND/GLYCAN and PubChem +0151 %Compound/Substance, into corresponding COBRA model fields +0152 extractedMiriamNames=regexprep(extractedMiriamNames,'^kegg\..+','kegg'); +0153 extractedMiriamNames=regexprep(extractedMiriamNames,'^pubchem\..+','pubchem'); +0154 i=ismember(extractedMiriamNames,'kegg'); +0155 if any(i) % Combine KEGG compounds and glycans +0156 for j=1:length(i) +0157 if i(j) && isfield(newModel,'metKEGGID')~=1 +0158 newModel.metKEGGID=miriams(:,j); +0159 elseif i(j) +0160 newModel.metKEGGID=strcat(newModel.metKEGGID,';',miriams(:,j)); +0161 end +0162 end +0163 newModel.metKEGGID=regexprep(newModel.metKEGGID,'^;|;$',''); +0164 end +0165 i=ismember(extractedMiriamNames,'pubchem'); +0166 if any(i) % Combine Pubchem compounds and substances +0167 for j=1:length(i) +0168 if i(j) && isfield(newModel,'metPubChemID')~=1 +0169 newModel.metPubChemID=miriams(:,j); +0170 elseif i(j) +0171 newModel.metPubChemID=strcat(newModel.metPubChemID,';',miriams(:,j)); +0172 end +0173 end +0174 newModel.metPubChemID=regexprep(newModel.metPubChemID,'^;|;$',''); +0175 end +0176 %All other Miriams can be directly parsed with no modifications: +0177 for i = 1:length(metCOBRAfields) +0178 j=ismember(extractedMiriamNames,metNamespaces{i}); +0179 if any(j) +0180 eval(['newModel.' metCOBRAfields{i} ' = miriams(:,j);']) +0181 end +0182 end +0183 end +0184 if isfield(model,'inchis') +0185 newModel.metInChIString=regexprep(strcat('InChI=', model.inchis),'^InChI=$',''); +0186 end +0187 newModel.b=zeros(numel(model.mets),1); +0188 newModel.csense=repmat('E',size(model.mets)); +0189 if isfield(model,'geneMiriams') +0190 [miriams,extractedMiriamNames]=extractMiriam(model.geneMiriams); +0191 for i = 1:length(geneCOBRAfields) +0192 j=ismember(extractedMiriamNames,geneNamespaces{i}); +0193 if any(j) +0194 eval(['newModel.' geneCOBRAfields{i} ' = miriams(:,j);']) +0195 end +0196 end +0197 end +0198 if isfield(model,'geneShortNames') +0199 newModel.geneNames=model.geneShortNames; +0200 end +0201 if isfield(model,'genes') +0202 newModel.rules=grrulesToRules(model); +0203 else +0204 fprintf('WARNING: no genes detected. The model therefore may not be exportable to SBML file with writeCbModel\n'); +0205 end +0206 newModel.osenseStr='max'; +0207 else +0208 fprintf('Converting COBRA structure to RAVEN..\n'); +0209 %Convert from COBRA to RAVEN structure +0210 +0211 %Mandatory RAVEN fields +0212 newModel.mets=model.mets; +0213 if ~isfield(model,'comps') +0214 %Since 'comps' field is not mandatory in COBRA, it may be required +0215 %to obtain the non-redundant list of comps from metabolite ids, if +0216 %'comps' field is not available +0217 newModel.comps = unique(regexprep(model.mets,'.*\[([^\]]+)\]$','$1')); +0218 newModel.compNames = newModel.comps; +0219 end +0220 for i=1:numel(newModel.comps) +0221 newModel.mets=regexprep(newModel.mets,['\[', newModel.comps{i}, '\]$'],''); +0222 newModel.mets=regexprep(newModel.mets,['\[', newModel.compNames{i}, '\]$'],''); +0223 end +0224 +0225 %In some cases (e.g. any model that uses BiGG ids as main ids), there +0226 %may be overlapping mets due to removal of compartment info. To avoid +0227 %this, we change compartments from e.g. [c] into _c +0228 if numel(unique(newModel.mets))~=numel(model.mets) +0229 newModel.mets=model.mets; +0230 for i=1:numel(newModel.comps) +0231 newModel.mets=regexprep(newModel.mets,['\[' newModel.comps{i} '\]$'],['_' newModel.comps{i}]); +0232 end +0233 end +0234 %Since COBRA no longer contains rev field it is assumed that rxn is +0235 %reversible if its lower bound is set below zero +0236 if ~isfield(model,'rev') +0237 for i=1:numel(model.rxns) +0238 if model.lb(i)<0 +0239 newModel.rev(i,1)=1; +0240 else +0241 newModel.rev(i,1)=0; +0242 end +0243 end +0244 end +0245 newModel.b=zeros(numel(model.mets),1); +0246 +0247 %metComps is also mandatory, but defined later to match the order of +0248 %fields +0249 +0250 %Fields 'name' and 'id' are also considered as mandatory, but +0251 %these are added to the model during exportModel/exportToExcelFormat +0252 %anyway, so there is no point to add this information here +0253 +0254 %Optional RAVEN fields +0255 if isfield(model,'modelID') +0256 newModel.id=model.modelID; +0257 end +0258 if isfield(model,'modelName') +0259 newModel.name=model.modelName; +0260 end +0261 if isfield(model,'rules') && ~isfield(model,'grRules') +0262 model.grRules = rulesTogrrules(model); +0263 end +0264 if isfield(model,'grRules') +0265 [grRules,rxnGeneMat] = standardizeGrRules(model,true); +0266 newModel.grRules = grRules; +0267 newModel.rxnGeneMat = rxnGeneMat; +0268 end +0269 if isfield(model,'rxnECNumbers') +0270 newModel.eccodes=regexprep(model.rxnECNumbers,'EC|EC:',''); +0271 end +0272 if any(isfield(model,rxnCOBRAfields)) +0273 for i=1:numel(model.rxns) +0274 counter=1; +0275 newModel.rxnMiriams{i,1}=[]; +0276 if isfield(model,'rxnReferences') +0277 if ~isempty(model.rxnReferences{i}) +0278 pmids = model.rxnReferences{i}; +0279 pmids = strsplit(pmids,'; '); +0280 nonPmids = cellfun(@isempty,regexp(pmids,'^\d+$','match','once')); +0281 if any(nonPmids) %Not a pubmed id, keep in rxnReferences instead +0282 newModel.rxnReferences{i,1} = strjoin(pmids(nonPmids),', '); +0283 pmids(nonPmids)=[]; +0284 end +0285 for j = 1:length(pmids) +0286 newModel.rxnMiriams{i,1}.name{counter,1} = 'pubmed'; +0287 newModel.rxnMiriams{i,1}.value{counter,1} = pmids{j}; +0288 counter=counter+1; +0289 end +0290 end +0291 end +0292 for j = 2:length(rxnCOBRAfields) %Start from 2, as 1 is rxnReferences +0293 if isfield(model,rxnCOBRAfields{j}) +0294 rxnAnnotation = eval(['model.' rxnCOBRAfields{j} '{i}']); +0295 if ~isempty(rxnAnnotation) +0296 rxnAnnotation = strtrim(strsplit(rxnAnnotation,';')); +0297 for a=1:length(rxnAnnotation) +0298 newModel.rxnMiriams{i,1}.name{counter,1} = rxnNamespaces{j}; +0299 newModel.rxnMiriams{i,1}.value{counter,1} = rxnAnnotation{a}; +0300 counter=counter+1; +0301 end +0302 end +0303 end +0304 end +0305 end 0306 end -0307 if any(isfield(model,geneCOBRAfields)) -0308 for i=1:numel(model.genes) -0309 counter=1; -0310 newModel.geneMiriams{i,1}=[]; -0311 for j = 1:length(geneCOBRAfields) -0312 if isfield(model,geneCOBRAfields{j}) -0313 geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']); -0314 if ~isempty(geneAnnotation) -0315 geneAnnotation = strtrim(strsplit(geneAnnotation,';')); -0316 for a=1:length(geneAnnotation) -0317 newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j}; -0318 newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a}; -0319 counter=counter+1; -0320 end -0321 end -0322 end -0323 end -0324 end -0325 end -0326 if isfield(model,'geneNames') -0327 newModel.geneShortNames=model.geneNames; -0328 end -0329 newModel.metNames=model.metNames; -0330 for i=1:numel(newModel.comps) -0331 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],''); -0332 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],''); -0333 end -0334 newModel.metNames=deblank(newModel.metNames); -0335 newModel.metComps=regexprep(model.mets,'^.+\[',''); -0336 newModel.metComps=regexprep(newModel.metComps,'\]$',''); -0337 [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps); -0338 if isfield(model,'metInChIString') -0339 newModel.inchis=regexprep(model.metInChIString,'^InChI=',''); -0340 end -0341 printWarning=false; -0342 if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID'])) -0343 for i=1:numel(model.mets) -0344 counter=1; -0345 newModel.metMiriams{i,1}=[]; -0346 if isfield(model,'metKEGGID') -0347 if ~isempty(model.metKEGGID{i}) -0348 if strcmp(model.metKEGGID{i}(1),'C') -0349 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound'; -0350 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; -0351 counter=counter+1; -0352 elseif strcmp(model.metKEGGID{i}(1),'G') -0353 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan'; +0307 if isfield(newModel,'rxnReferences') +0308 emptyEntry = cellfun(@isempty,newModel.rxnReferences); +0309 newModel.rxnReferences(emptyEntry)={''}; +0310 end +0311 if any(isfield(model,geneCOBRAfields)) +0312 for i=1:numel(model.genes) +0313 counter=1; +0314 newModel.geneMiriams{i,1}=[]; +0315 for j = 1:length(geneCOBRAfields) +0316 if isfield(model,geneCOBRAfields{j}) +0317 geneAnnotation = eval(['model.' geneCOBRAfields{j} '{i}']); +0318 if ~isempty(geneAnnotation) +0319 geneAnnotation = strtrim(strsplit(geneAnnotation,';')); +0320 for a=1:length(geneAnnotation) +0321 newModel.geneMiriams{i,1}.name{counter,1} = geneNamespaces{j}; +0322 newModel.geneMiriams{i,1}.value{counter,1} = geneAnnotation{a}; +0323 counter=counter+1; +0324 end +0325 end +0326 end +0327 end +0328 end +0329 end +0330 if isfield(model,'geneNames') +0331 newModel.geneShortNames=model.geneNames; +0332 end +0333 newModel.metNames=model.metNames; +0334 for i=1:numel(newModel.comps) +0335 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.comps{i}, '\]$'],''); +0336 newModel.metNames=regexprep(newModel.metNames,['\[', newModel.compNames{i}, '\]$'],''); +0337 end +0338 newModel.metNames=deblank(newModel.metNames); +0339 newModel.metComps=regexprep(model.mets,'^.+\[',''); +0340 newModel.metComps=regexprep(newModel.metComps,'\]$',''); +0341 [~, newModel.metComps]=ismember(newModel.metComps,newModel.comps); +0342 if isfield(model,'metInChIString') +0343 newModel.inchis=regexprep(model.metInChIString,'^InChI=',''); +0344 end +0345 printWarning=false; +0346 if any(isfield(model,[metCOBRAfields;'metKEGGID';'metPubChemID'])) +0347 for i=1:numel(model.mets) +0348 counter=1; +0349 newModel.metMiriams{i,1}=[]; +0350 if isfield(model,'metKEGGID') +0351 if ~isempty(model.metKEGGID{i}) +0352 if strcmp(model.metKEGGID{i}(1),'C') +0353 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.compound'; 0354 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; 0355 counter=counter+1; -0356 end -0357 end -0358 end -0359 if isfield(model,'metPubChemID') -0360 if ~isempty(model.metPubChemID{i}) -0361 if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:') -0362 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; -0363 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; -0364 counter=counter+1; -0365 elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:') -0366 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance'; +0356 elseif strcmp(model.metKEGGID{i}(1),'G') +0357 newModel.metMiriams{i,1}.name{counter,1} = 'kegg.glycan'; +0358 newModel.metMiriams{i,1}.value{counter,1} = model.metKEGGID{i}; +0359 counter=counter+1; +0360 end +0361 end +0362 end +0363 if isfield(model,'metPubChemID') +0364 if ~isempty(model.metPubChemID{i}) +0365 if length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'CID:') +0366 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; 0367 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; 0368 counter=counter+1; -0369 else -0370 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; +0369 elseif length(model.metPubChemID{i})>3 && strcmp(model.metPubChemID{i}(1:4),'SID:') +0370 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.substance'; 0371 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; 0372 counter=counter+1; -0373 printWarning=true; -0374 end -0375 end -0376 end -0377 for j = 1:length(metCOBRAfields) -0378 if isfield(model,metCOBRAfields{j}) -0379 metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']); -0380 if ~isempty(metAnnotation) -0381 metAnnotation = strtrim(strsplit(metAnnotation,';')); -0382 for a=1:length(metAnnotation) -0383 newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j}; -0384 newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a}; -0385 counter=counter+1; -0386 end -0387 end -0388 end -0389 end -0390 end -0391 end -0392 if printWarning -0393 fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n'); -0394 end -0395 end -0396 -0397 % Order fields -0398 modelNew=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models +0373 else +0374 newModel.metMiriams{i,1}.name{counter,1} = 'pubchem.compound'; +0375 newModel.metMiriams{i,1}.value{counter,1} = model.metPubChemID{i}; +0376 counter=counter+1; +0377 printWarning=true; +0378 end +0379 end +0380 end +0381 for j = 1:length(metCOBRAfields) +0382 if isfield(model,metCOBRAfields{j}) +0383 metAnnotation = eval(['model.' metCOBRAfields{j} '{i}']); +0384 if ~isempty(metAnnotation) +0385 metAnnotation = strtrim(strsplit(metAnnotation,';')); +0386 for a=1:length(metAnnotation) +0387 newModel.metMiriams{i,1}.name{counter,1} = metNamespaces{j}; +0388 newModel.metMiriams{i,1}.value{counter,1} = metAnnotation{a}; +0389 counter=counter+1; +0390 end +0391 end +0392 end +0393 end +0394 end +0395 end +0396 if printWarning +0397 fprintf('Could not determine whether PubChemIDs are compounds (CID)\n or substances (SID). All annotated PubChemIDs will therefore \n be assigned as compounds (CID).\n'); +0398 end 0399 end 0400 -0401 function rules=grrulesToRules(model) -0402 %This function just takes grRules, changes all gene names to -0403 %'x(geneNumber)' and also changes 'or' and 'and' relations to corresponding -0404 %symbols -0405 replacingGenes=cell([size(model.genes,1) 1]); -0406 for i=1:numel(replacingGenes) -0407 replacingGenes{i}=strcat('x(',num2str(i),')'); -0408 end -0409 rules = strcat({' '},model.grRules,{' '}); -0410 for i=1:length(model.genes) -0411 rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']); -0412 rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']); -0413 rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']); -0414 end -0415 rules=regexprep(rules,' and ',' & '); -0416 rules=regexprep(rules,' or ',' | '); -0417 rules=strtrim(rules); +0401 % Order fields +0402 newModel=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models +0403 end +0404 +0405 function rules=grrulesToRules(model) +0406 %This function just takes grRules, changes all gene names to +0407 %'x(geneNumber)' and also changes 'or' and 'and' relations to corresponding +0408 %symbols +0409 replacingGenes=cell([size(model.genes,1) 1]); +0410 for i=1:numel(replacingGenes) +0411 replacingGenes{i}=strcat('x(',num2str(i),')'); +0412 end +0413 rules = strcat({' '},model.grRules,{' '}); +0414 for i=1:length(model.genes) +0415 rules=regexprep(rules,[' ' model.genes{i} ' '],[' ' replacingGenes{i} ' ']); +0416 rules=regexprep(rules,['(' model.genes{i} ' '],['(' replacingGenes{i} ' ']); +0417 rules=regexprep(rules,[' ' model.genes{i} ')'],[' ' replacingGenes{i} ')']); 0418 end -0419 -0420 function grRules=rulesTogrrules(model) -0421 %This function takes rules, replaces &/| for and/or, replaces the x(i) -0422 %format with the actual gene ID, and takes out extra whitespace and -0423 %redundant parenthesis introduced by COBRA, to create grRules. -0424 grRules = strrep(model.rules,'&','and'); -0425 grRules = strrep(grRules,'|','or'); -0426 for i = 1:length(model.genes) -0427 grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i}); -0428 end -0429 grRules = strrep(grRules,'( ','('); -0430 grRules = strrep(grRules,' )',')'); -0431 grRules = regexprep(grRules,'^(',''); %rules that start with a "(" -0432 grRules = regexprep(grRules,')$',''); %rules that end with a ")" -0433 end +0419 rules=regexprep(rules,' and ',' & '); +0420 rules=regexprep(rules,' or ',' | '); +0421 rules=strtrim(rules); +0422 end +0423 +0424 function grRules=rulesTogrrules(model) +0425 %This function takes rules, replaces &/| for and/or, replaces the x(i) +0426 %format with the actual gene ID, and takes out extra whitespace and +0427 %redundant parenthesis introduced by COBRA, to create grRules. +0428 grRules = strrep(model.rules,'&','and'); +0429 grRules = strrep(grRules,'|','or'); +0430 for i = 1:length(model.genes) +0431 grRules = strrep(grRules,['x(' num2str(i) ')'],model.genes{i}); +0432 end +0433 grRules = strrep(grRules,'( ','('); +0434 grRules = strrep(grRules,' )',')'); +0435 grRules = regexprep(grRules,'^(',''); %rules that start with a "(" +0436 grRules = regexprep(grRules,')$',''); %rules that end with a ")" +0437 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/struct_conversion/ravenCobraWrapper.m b/struct_conversion/ravenCobraWrapper.m index 0e391969..b1b37e77 100755 --- a/struct_conversion/ravenCobraWrapper.m +++ b/struct_conversion/ravenCobraWrapper.m @@ -8,7 +8,9 @@ % % This function is a bidirectional tool to convert between RAVEN and % COBRA structures. It recognises COBRA structure by checking field -% 'rules' existense, which is only found in COBRA Toolbox structure. +% 'rules' existense, which is only found in COBRA Toolbox structure. If +% the COBRA model also has a grRules field, then this will be used +% instead of parsing the rules field. % % NOTE: During RAVEN -> COBRA -> RAVEN conversion cycle the following % fields are lost: annotation, compOutside, compMiriams, rxnComps, @@ -256,8 +258,10 @@ if isfield(model,'modelName') newModel.name=model.modelName; end - if isfield(model,'rules') + if isfield(model,'rules') && ~isfield(model,'grRules') model.grRules = rulesTogrrules(model); + end + if isfield(model,'grRules') [grRules,rxnGeneMat] = standardizeGrRules(model,true); newModel.grRules = grRules; newModel.rxnGeneMat = rxnGeneMat; @@ -395,7 +399,7 @@ end % Order fields -modelNew=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models +newModel=standardizeModelFieldOrder(newModel); % Corrects for both RAVEN and COBRA models end function rules=grrulesToRules(model)