Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add metDeltaG and rxnDeltaG fields #330

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# yeast-GEM specific #
###################
yetfl_metG.csv
yetfl_rxnG.csv

# Compiled source #
###################
*.com
Expand Down
16 changes: 10 additions & 6 deletions code/getEarlierModelVersion.m
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
% version either 'main' or 'develop' for the latest model file from
% the specified branch. Can alternatively be a specific model
% version (e.g. '8.5.1'), to load a the model file from a
% specific release version.
% specific release version, or a commit SHA.
% verbose if true, the model version (as obtained from model.id) is
% printed (opt, default true).
%
Expand All @@ -27,20 +27,24 @@
system('git show main:model/yeast-GEM.xml > _earlierModel.xml');
elseif strcmp(version,'develop')
system('git show develop:model/yeast-GEM.xml > _earlierModel.xml');
elseif ~isempty(regexp(version,'^\d+\.\d+\.\d+$'))
tagpath= ['refs/tags/v' version ':model/yeast-GEM.xml'];
else
if ~isempty(regexp(version,'^\d+\.\d+\.\d+$','once'))
version = ['refs/tags/v' version];
elseif numel(version) == 40 % Might be commit SHA
else
error('Unclear which earlier model version should be loaded')
end
tagpath= [version ':model/yeast-GEM.xml'];
[out,~]=system(['git show ' tagpath ' > _earlierModel.xml']);
if out==128 % File not found, try older folder structure
delete '_earlierModel.xml'
tagpath= ['refs/tags/v' version ':ModelFiles/xml/yeastGEM.xml'];
tagpath= [version ':ModelFiles/xml/yeastGEM.xml'];
[out,~]=system(['git show ' tagpath ' > _earlierModel.xml']);
if out==128
delete '_earlierModel.xml'
error('Cannot find the specified model version')
end
end
else
error('Unclear which earlier model version should be loaded')
end

model=importModel('_earlierModel.xml');
Expand Down
4 changes: 3 additions & 1 deletion code/loadYeastModel.m
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@
model = ravenCobraWrapper(model);
end
end
disp('If there is only 1 warning and no errors, it can savely be ignored.')
cd missingFields
model = loadDeltaG(model);
cd(currentDir)
end
45 changes: 45 additions & 0 deletions code/missingFields/loadDeltaG.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
function model = loadDeltaG(model)
% loadDeltaG
% Add metDeltaG and rxnDeltaG fields to a model, based on datafiles saved at
% /data/databases (model_rxnDeltaG.csv and model_metDeltaG.csv). Metabolites
% and reactions are matched by their identifiers (i.e. model.mets and
% model.rxns). If changes are made that affect the identifiers or what
% metabolites or reactions they refer to, the deltaG values will not be
% correct.
%
% Input:
% model yeast-GEM without deltaG fields
%
% Output:
% model yeast-GEM with metDeltaG and rxnDeltaG fields
%
% Usage: model = loadDeltaG(model)

if isfield(model,'metDeltaG')
disp('Existing metDeltaG field will be overwritten.')
else
model.metDeltaG = nan(numel(model.mets),1);
end
if isfield(model,'rxnDeltaG')
disp('Existing rxnDeltaG field will be overwritten.')
else
model.rxnDeltaG = nan(numel(model.rxns),1);
end

metG = readtable('../../data/databases/model_metDeltaG.csv');
rxnG = readtable('../../data/databases/model_rxnDeltaG.csv');

[a,b] = ismember(model.mets,metG.Var1);
model.metDeltaG(a) = metG.Var2(b(a));
if any(~a)
fprintf(['Not all metabolite identifiers are matched to model_metDeltaG.csv, the latter \n' ...
'file might have to be supplemented with deltaG values for new metabolites.\n'])
end

[a,b] = ismember(model.rxns,rxnG.Var1);
model.rxnDeltaG(a) = rxnG.Var2(b(a));
if any(~a)
fprintf(['Not all reaction identifiers are matched to model_rxnDeltaG.csv, the latter \n' ...
'file might have to be supplemented with deltaG values for new reaction.\n'])
end
end
36 changes: 36 additions & 0 deletions code/missingFields/saveDeltaG.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
function model = saveDeltaG(model,verbose)
% saveDeltaG
% Saves the metDeltaG and rxnDeltaG fields as tables to /data/databases/...
% model_rxnDeltaG.csv and /data/databases/model_metDeltaG.csv. When
% loadYeastModel is run, these tables will be read to reconstruct the
% metDeltaG and rxnDeltaG fields.
%
% Input:
% model yeast-GEM with deltaG fields
% verbose true or false
%
% Output:
% model yeast-GEM with metDeltaG and rxnDeltaG fields
%
% Usage: model = saveDeltaG(model,verbose)

if nargin<2
verbose=false;
end
if ~isfield(model,'metDeltaG')
if verbose
disp('No metDeltaG field found, model_metDeltaG.csv will not be changed.')
end
else
metG = array2table([model.mets, num2cell(model.metDeltaG)]);
writetable(metG,'../../data/databases/model_metDeltaG.csv');
end
if ~isfield(model,'rxnDeltaG')
if verbose
disp('No rxnDeltaG field found, model_rxnDeltaG.csv will not be changed')
end
else
rxnG = array2table([model.rxns, num2cell(model.rxnDeltaG)]);
writetable(rxnG,'../../data/databases/model_rxnDeltaG.csv');
end
end
70 changes: 70 additions & 0 deletions code/modelCuration/v8_7_0.m
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,76 @@
genesInfo = fullfile(dataDir,'DBnewRxnsGenes.tsv');
model = curateMetsRxnsGenes(model,metsInfo,genesInfo,rxnsCoeffs,rxnsInfo);

%% Add deltaG for reactions and metabolites (PR #330)
cd(dataDir)
% Gather yETFL data
% First-time run:
websave('input_models.zip','https://zenodo.org/record/4778047/files/input_models.zip?download=1');
unzip('input_models.zip');
modelYETFL = load('input_models/yeast8_thermo_curated.mat');
modelYETFL = modelYETFL.model;
modelYETFL.metNames = regexprep(modelYETFL.metNames,' \[[\w ]+\]$','');
[modelYETFL.metNames,idx]=unique(modelYETFL.metNames(1:end-1));
modelYETFL.metDeltaGFtr = modelYETFL.metDeltaGFtr(idx);
yetfl_metG = array2table([modelYETFL.metNames, num2cell(modelYETFL.metDeltaGFtr)]);
writetable(yetfl_metG,'yetfl_metG.csv');
yetfl_rxnG = array2table([modelYETFL.rxns, num2cell(modelYETFL.rxnDeltaGR)]);
writetable(yetfl_rxnG,'yetfl_rxnG.csv');
clear modelYETFL idx
rmdir input_models s
delete input_models.zip
% Repeated runs can reuse this file, but will not submit to GitHub
yetfl_metG = readtable('yetfl_metG.csv');
yetfl_rxnG = readtable('yetfl_rxnG.csv');

% Gather ModelSEED data via get_seed_data.py, CSV obtained how?
seed_metG = readtable('modelseed_metG.csv');

% Gather dG-predictor data via run_dgpredictor.py
dgpred_rxnG = readtable('dgpred_rxnG.csv');

% Assign metDeltaG, preferred source: yETFL > ModelSEED
model.metDeltaG = nan(numel(model.mets),1);

% Map to yETFL by metNames
[a,b] = ismember(model.metNames,yetfl_metG.Var1);
model.metDeltaG(a) = round(yetfl_metG.Var2(b(a)),2);

% If no deltaG is assigned, map to ModelSEED by KEGG IDs
noDeltaG = isnan(model.metDeltaG);

kegg = extractMiriam(model.metMiriams,'kegg.compound');
hasKEGG = ~cellfun(@isempty,kegg);
toCheck = find(hasKEGG & noDeltaG);

[a,b] = ismember(kegg(toCheck),seed_metG.kegg);
model.metDeltaG(toCheck(a)) = round(seed_metG.deltag(b(a)),2);

% Assign rxnDeltaG, preferred source: yETFL > dG-predictor
model.rxnDeltaG = nan(numel(model.rxns),1);

% Map to yETFL by reaction IDs
[a,b] = ismember(model.rxns,yetfl_rxnG.Var1);
model.rxnDeltaG(a) = round(yetfl_rxnG.Var2(b(a)),2);

% If no deltaG is assigned, calculate by dG-predictor
noDeltaG = isnan(model.rxnDeltaG);
% Actually, no deltaG is found for model.rxns(3983:4131). Current
% dG-predictor dataset has data for model.rxns(1:4062). For now, use the
% deltaGs for model.rxns(3983:4062).
model.rxnDeltaG(3983:4062) = round(dgpred_rxnG.detaG(3983:4062),2);

% RAVEN does not yet support .rxnDeltaG and .metDeltaG fields.
% For now, write these two tables. RAVEN will soon support I/O of deltaG in
% YAML files, and a custom function could be written to add the field after
% loading SBML file as well. Otherwise, the current script can be run
cd ../../databases/
metG = array2table([model.mets, num2cell(model.metDeltaG)]);
writetable(metG,'model_metDeltaG.csv');
rxnG = array2table([model.rxns, num2cell(model.rxnDeltaG)]);
writetable(rxnG,'model_rxnDeltaG.csv');
cd(fullfile(dataDir,'..','..','..','code','modelCuration'))

checkModelStruct(model,true,false)

%% DO NOT CHANGE OR REMOVE THE CODE BELOW THIS LINE.
Expand Down
8 changes: 5 additions & 3 deletions code/saveYeastModel.m
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ function saveYeastModel(model,upDATE,allowNoGrowth,binaryFiles)
exportForGit(model,'yeast-GEM','../model',{'yml','txt','xlsx','mat'},false,false);
end

%Write deltaG fields to file
cd missingFields
saveDeltaG(model,false);
cd ..

%Update README file: date + size of model
modelVersion = regexprep(model.id,'yeastGEM_v?','');
nGenes=num2str(numel(model.genes));
Expand Down Expand Up @@ -116,11 +121,9 @@ function saveYeastModel(model,upDATE,allowNoGrowth,binaryFiles)

%Switch back to original folder
cd(currentDir)

end

%%

function checkGrowth(model,condition,allowNoGrowth)
%Function that checks if the model can grow or not using COBRA under a
%given condition (aerobic or anaerobic). Will either return warnings or
Expand Down Expand Up @@ -151,5 +154,4 @@ function checkGrowth(model,condition,allowNoGrowth)
error([dispText ' before committing.'])
end
end

end
Loading