refactor: FSEOF

SysBioChalmers · Oct 3, 2023 · a3b327a · a3b327a
1 parent cb7599d
commit a3b327a
Showing 1 changed file with 63 additions and 137 deletions.
diff --git a/src/geckomat/utilities/ecFSEOF.m b/src/geckomat/utilities/ecFSEOF.m
@@ -1,4 +1,4 @@
-function fseof = ecFSEOF(model,targetRxn,csRxn,nSteps,outputFile,filePath,filterG,modelAdapter)
+function fseof = ecFSEOF(model,targetRxn,csRxn,nSteps,outputFile,filePath,modelAdapter)
 % ecFSEOF
 %   Function that runs Flux-Scanning with Enforced Objective Function (FSEOF)
 %   for a specified production target.
@@ -17,51 +17,39 @@
 %                   - at the reactions level, ecFSEOF_rxns.tsv
 %                   (Optional, default in the 'output' sub-folder taken from
 %                   modelAdapter, e.g. output/ecFSEOF_rxns.tsv)
-%   filterG         logical value. TRUE if genes K_scores results should be
-%                   filtered according to the alpha vector distribution
-%                   (Optional, default true)
 %   modelAdapter    a loaded model adapter. (Optional, will otherwise use
 %                   the default model adapter)
 %
 % Output:
 %   fseof        structure with all results. Contains the following fields:
-%                - flux_WT:   flux distribution for the WT strain (100% of
-%                             carbon towards growth).
 %                - alpha:     scalling factors used for enforced objetive
-%                             limits
+%                             limits (from minimum to maximum production)
 %                - v_matrix:  fluxes for each reaction in rxnsTable.rxns
 %                             and each alpha.
-%                - k_matrix:  fold changes for each reaction in
-%                             rxnsTable.rxns and each alpha.
 %                - rxnsTable: a list with all reactions with fluxes that
 %                             change consistently as target production
 %                             increases.
-%                             Contains: ID, name, k_score, gene rule, equation
+%                             Contains: ID, name, slope, gene rule, equation
 %                - geneTable: a list with all selected targets that
 %                             increase production.
-%                             Contains: gene, shortName, k_score
+%                             Contains: gene, shortName, slope
 %
 % Usage:
 %   fseof = ecFSEOF(model,targetRxn,csRxn,nSteps,outputFile,filePath,filterG,modelAdapter)
 
-if nargin < 8 || isempty(modelAdapter)
+if nargin < 7 || isempty(modelAdapter)
     modelAdapter = ModelAdapterManager.getDefault();
     if isempty(modelAdapter)
         error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
     end
 end
 params = modelAdapter.getParameters();
 
-if nargin < 7 || isempty(filterG)
-    filterG = true;
-end
-
-if nargin < 6 || isempty(filePath)
-    filePath = fullfile(params.path,'output');
-end
-
 if nargin < 5 || isempty(outputFile)
     outputFile = false;
+    if nargin < 6 || isempty(filePath)
+        filePath = fullfile(params.path,'output');
+    end
 end
 
 if nargin < 4 || isempty(nSteps)
@@ -99,16 +87,10 @@
 
 % Define alpha vector for suboptimal enforced objective values between
 % minimal production and 90%% of the maximum theoretical yield, initialize
-% fluxes, K_scores matrices and other variables
-alpha     = iniTarget:((maxTarget-iniTarget)/(nSteps-1)):maxTarget;
-v_matrix  = zeros(length(model.rxns),length(alpha));
-% k_matrix  = zeros(length(model.rxns),length(alpha));
-tolerance = 1e-4;
+% fluxes matriz
+alpha    = iniTarget:((maxTarget-iniTarget)/(nSteps-1)):maxTarget;
+v_matrix = zeros(length(model.rxns),length(alpha));
 
-% % Get WT flux distribution
-% model.lb(bioRxnIdx) = sol.x(bioRxnIdx) * 0.99;
-% model = setParam(model, 'obj', 'prot_pool_exchange', 1);
-% solWT = solveLP(model,1);
 % Enforce objective flux iteratively
 progressbar('Flux Scanning with Enforced Objective Function')
 for i = 1:nSteps
@@ -118,19 +100,11 @@
     % Restore minimum biomass lb to zero and set it as objective
     model.lb(bioRxnIdx) = 0;
     model = setParam(model,'obj',params.bioRxn,1);
-    sol = solveLP(model,1);
-
-    % Relax target lb, fix biomass and minimize protein usage
-    % model.lb(targetRxnIdx) = model.lb(targetRxnIdx) * 0.99;
-    % model.lb(bioRxnIdx)    = sol.x(bioRxnIdx) * 0.999;
-    % model = setParam(model, 'obj', 'prot_pool_exchange', 1);
-    % sol = solveLP(model,1);
+    sol   = solveLP(model,1);
 
     % Store flux distribution
     v_matrix(:,i) = sol.x;
 
-    % k_matrix(:,i) = v_matrix(:,i)./v_matrix(:,1);
-
     progressbar(i/nSteps)
 end
 progressbar(1) % Make sure it closes
@@ -141,30 +115,37 @@
 withGR(stdIdx) = 0;
 rxns     = model.rxns(withGR);
 v_matrix = v_matrix(withGR,:);
-% k_matrix = k_matrix(withGR,:);
 rxnGeneM = model.rxnGeneMat(withGR,:);
 
 % Filter out rxns that are always zero
 zero_flux = ~all(abs(v_matrix(:,1:nSteps))<=1e-2,2);
-% zero_flux = sum(~isnan(k_matrix),2) > 0;
 rxns      = rxns(zero_flux,:);
 v_matrix  = v_matrix(zero_flux,:);
-% k_matrix  = k_matrix(zero_flux,:);
 rxnGeneM  = rxnGeneM(zero_flux,:);
 
-% Identify those rxns that always increase or decrease, and only keep them
-k_rxns = zeros(size(rxns));
+% Identify those rxns that always increase or decrease, and calculate the
+% slope as the difference in the flux when enforce objetive target 
+% production is set to 90%% of the maximum teorethical yield 
+% << v_matrix(i,nSteps-1) >> and the flux when the enforce objetive target
+% production is set to the minimum << v_matrix(i,1) >> for the reaction i, 
+% divided by maxTarget-maxTarget/nSteps-1.
+slope_rxns  = zeros(size(rxns));
 target_rxns = logical(size(rxns));
 target_type = cell(size(rxns));
 for i = 1:length(rxns)
     if issorted(abs(v_matrix(i,1:nSteps)),'strictascend')
+        % Those reactions that always increase while enforcing target
+        % production are suggested for Over Expression
         target_rxns(i) = true;
-        k_rxns(i) = abs(v_matrix(i,nSteps-1)-v_matrix(i,1))/abs(maxTarget-maxTarget/nSteps-1);
-        % k_rxns(i) = abs(iniTarget-maxTarget/nSteps)/abs(v_matrix(i,nSteps)-v_matrix(i,1));
+        slope_rxns(i)  = abs(v_matrix(i,nSteps-1)-v_matrix(i,1))/abs(maxTarget-maxTarget/nSteps-1);
         target_type(i) = {'OE'};
     elseif issorted(abs(v_matrix(i,1:nSteps)),'strictdescend')
+        % Those reactions that always decrease while enforcing target
+        % production are suggested for KnockDown or KnockOut. KO are those
+        % reactions which have zero flux when enforcing target production 
+        % to 90%% of the maximum theoretical yield. 
         target_rxns(i) = true;
-        k_rxns(i) = abs(v_matrix(i,nSteps-1)-v_matrix(i,1))/abs(maxTarget-maxTarget/nSteps-1);
+        slope_rxns(i)  = abs(v_matrix(i,nSteps-1)-v_matrix(i,1))/abs(maxTarget-maxTarget/nSteps-1);
         if v_matrix(i,nSteps) == 0
             target_type(i) = {'KO'};
         else
@@ -173,124 +154,69 @@
     end
 end
 
-rxns     = rxns(target_rxns);
-v_matrix = v_matrix(target_rxns,:);
-% k_matrix = k_matrix(target_rxns,:);
-rxnGeneM = rxnGeneM(target_rxns,:);
-k_rxns   = k_rxns(target_rxns);
+% Only keep those reaction that shows an increase or decrease pattern.
+rxns        = rxns(target_rxns);
+v_matrix    = v_matrix(target_rxns,:);
+rxnGeneM    = rxnGeneM(target_rxns,:);
+slope_rxns  = slope_rxns(target_rxns);
 target_type = target_type(target_rxns);
 
-% Order from highest to lowest
-[~,order] = sort(k_rxns,'descend');
-rxns      = rxns(order);
-v_matrix  = v_matrix(order,:);
-rxnGeneM  = rxnGeneM(order,:);
-k_rxns    = k_rxns(order);
+% Order from highest to lowest slope
+[~,order]   = sort(slope_rxns,'descend');
+rxns        = rxns(order);
+v_matrix    = v_matrix(order,:);
+rxnGeneM    = rxnGeneM(order,:);
+slope_rxns  = slope_rxns(order);
 target_type = target_type(order);
 
-ans = 1;
-% % Replace remaining NaNs with 1s:
-% k_matrix(isnan(k_matrix)) = 1;
-% 
-% % Replace any Inf value with 1000 (maximum value is ~700):
-% k_matrix(k_matrix > 1000)  = 1000;
-% % k_matrix(k_matrix < -1000) = 2;
-
-% % Filter out values that are inconsistent at different alphas:
-% always_down = sum(v_matrix <= 1,2) == length(alpha);
-% always_up   = sum(v_matrix >= 1,2) == length(alpha);
-% 
-% % Identify those reactions with mixed patterns
-% incons_rxns  = always_down + always_up == 0;
-% 
-% % Identify genes that are linked to "inconsistent rxns"
-% incons_genes = sum(rxnGeneM(incons_rxns,:),1) > 0;
-% 
-% % Finally, inconsistent reactions are those that are not conected
-% % to "inconsistent genes" from the original "inconsistent rxns" set
-% incons_rxns  = sum(rxnGeneM(:,incons_genes),2) > 0;
-% 
-% % Keep results for the consistent rxns exclusively
-% rxns     = rxns(~incons_rxns,:);
-% v_matrix = v_matrix(~incons_rxns,:);
-% k_matrix = k_matrix(~incons_rxns,:);
-% rxnGeneM = rxnGeneM(~incons_rxns,:);
-
-% % Get median k-score across steps and order from highest to lowest
-% k_rxns    = mean(k_matrix,2);
-% [~,order] = sort(k_rxns,'descend');
-% rxns      = rxns(order,:);
-% v_matrix  = v_matrix(order,:);
-% k_matrix  = k_matrix(order,:);
-% rxnGeneM  = rxnGeneM(order,:);
-% k_rxns    = k_rxns(order,:);
-
-% Create list of remaining genes and filter out any inconsistent score:
-% Just those genes that are connected to the remaining rxns are
-genes      = model.genes(sum(rxnGeneM,1) > 0);
-k_genes    = zeros(size(genes));
-cons_genes = false(size(genes));
-rxnGeneM   = rxnGeneM(:,sum(rxnGeneM,1) > 0);
+% Filter out reactions with slope = 0
+non_zero_slope  = slope_rxns > 0;
+rxns            = rxns(non_zero_slope);
+v_matrix        = v_matrix(non_zero_slope,:);
+rxnGeneM        = rxnGeneM(non_zero_slope,:);
+slope_rxns      = slope_rxns(non_zero_slope);
+target_type     = target_type(non_zero_slope);
+
+% Create gene list of those connected to the remaining rxns
+genes             = model.genes(sum(rxnGeneM,1) > 0);
+slope_genes       = zeros(size(genes));
+rxnGeneM          = rxnGeneM(:,sum(rxnGeneM,1) > 0);
 target_type_genes = cell(size(genes));
 
 for i = 1:length(genes)
-    % Extract all the K_scores (from rxns across alphas) conected to
+    % Extract all the slope (from rxns across alphas) conected to
     % each remaining gene
-    k_set         = k_rxns(rxnGeneM(:,i) > 0); disp(i);
-    % % % Check the kind of control that gene i-th exerts over its reactions
-    % always_down   = sum(k_set <= (1-tolerance)) == length(k_set);
-    % always_up     = sum(k_set >= (1+tolerance)) == length(k_set);
-    % % Evaluate if gene is always exerting either a positive or negative
-    % % control
-    % cons_genes(i) = always_down + always_up == 1;
-    k_genes(i)    = mean(k_set);
+    slope_set            = slope_rxns(rxnGeneM(:,i) > 0);
+    slope_genes(i)       = mean(slope_set);
+    % Since a gene can be involved in multiple reactions, multiple
+    % engineering manipulations can be suggested for the same gene.
+    % e.g. (OE and KD). So, report all of them.
     target_type_genes(i) = join(unique(target_type(rxnGeneM(:,i) > 0)),', ');
 end
 
-% Keep "consistent genes"
-% genes    = genes(cons_genes);
-% k_genes  = k_genes(cons_genes);
-% rxnGeneM = rxnGeneM(:,cons_genes);
-
-% if filterG
-%     % Filter any value between mean(alpha) and 1:
-%     unchanged = (k_genes >= mean(alpha) - tolerance) + (k_genes <= 1 + tolerance) == 2;
-%     genes     = genes(~unchanged);
-%     k_genes   = k_genes(~unchanged);
-%     rxnGeneM  = rxnGeneM(:,~unchanged);
-%     % Update results for rxns-related fields (remove remaining reactions
-%     % without any associated gene in rxnGeneM)
-%     toKeep   = (sum(rxnGeneM,2) > 0);
-%     rxns     = rxns(toKeep,:);
-%     v_matrix = v_matrix(toKeep,:);
-%     k_matrix = k_matrix(toKeep,:);
-%     k_rxns   = k_rxns(toKeep,:);
-% end
-
-% Order genes from highest to lowest k:
-[~,order] = sort(k_genes,'descend');
-genes     = genes(order,:);
-k_genes   = k_genes(order,:);
+% Order genes from highest to lowest slope:
+[~,order]         = sort(slope_genes,'descend');
+genes             = genes(order,:);
+slope_genes       = slope_genes(order,:);
 target_type_genes = target_type_genes(order,:);
 
 % Create output (exclude enzyme usage reactions):
 toKeep                 = ~startsWith(rxns,'usage_prot_');
 rxnIdx                 = getIndexes(model,rxns(toKeep),'rxns');
 geneIdx                = cellfun(@(x) find(strcmpi(model.genes,x)),genes);
-fseof.flux_WT          = solWT.x;
 fseof.alpha            = alpha;
 fseof.v_matrix         = v_matrix(toKeep,:);
-fseof.k_matrix         = k_matrix(toKeep,:);
 fseof.rxnsTable(:,1)   = model.rxns(rxnIdx);
 fseof.rxnsTable(:,2)   = model.rxnNames(rxnIdx);
-fseof.rxnsTable(:,3)   = num2cell(k_rxns(toKeep));
+fseof.rxnsTable(:,3)   = num2cell(slope_rxns(toKeep));
 fseof.rxnsTable(:,4)   = model.grRules(rxnIdx);
 fseof.rxnsTable(:,5)   = constructEquations(model,rxnIdx);
 fseof.geneTable(:,1)   = genes;
 fseof.geneTable(:,2)   = model.geneShortNames(geneIdx);
-fseof.geneTable(:,3)   = num2cell(k_genes);
+fseof.geneTable(:,3)   = num2cell(slope_genes);
 fseof.geneTable(:,4)   = target_type_genes;
 
+% Save results in a file if defined
 if outputFile
     % Write file with gene targets
     writetable(cell2table(fseof.geneTable, ...