Skip to content

Addfile icongems #2271

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
302 changes: 302 additions & 0 deletions src/analysis/ICONGEMs/ICONGEMs.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
function [solICONGEMs, boundEf] = ICONGEMs(model, exp, genetxt, condition, threshold, alpha)
% Algorithm to Integrate a Gene Co-expression Network and Genome-scale Metabolic Model:
% This algorithm calculates the reaction flux distribution for each condition by applying
% quadratic programming.
%
% USAGE:
%
% [solICONGEMs, boundEf] = ICONGEMs(model, exp, genetxt, condition, threashold, alpha)
%
% INPUTS:
%
% model: input model (COBRA model structure)
% exp: expression profile corresponding to the gene names that
% extract from gene expression profile file
% genetxt: list of gene names that extract from gene expression profile
% file
%
% OPTIONAL INPUTS:
% threshold: The value of the correlation coefficient for constructing
% the co-expression network (default value: 0.9).
% condition: Row vector indicating the number of conditions
% corresponding to the conditions in exp
% (default value: 1:size(exp, 2)).
% alpha: The value for the proportion of biomass (default value: 1).
%
% OUTPUTS:
% solICONGEMs: Flux distribution table corresponding to reaction flux names.
% boundEf: Upper bound of E-flux.
%
% EXAMPLES:
% % This could be an example that can be copied from the documentation to MATLAB:
% solution = ICONGEMs(model, exp, genetxt, condition, threashold, alpha)
% % without optional values:
% solution = ICONGEMs(model, exp, genetxt)
%
%..Author:
% -Thummarat Paklao, 01/02/2024, Department of Mathematics and Computer Science, Faculty of Science, Chulalongkorn University, Thailand.
% -Apichat Suratanee, 01/02/2024, Department of Mathematics, Faculty of Applied Science, King Mongkut's University of Technology North Bangkok.
% -Kitiporn Plaimas, 01/02/2024, Department of Mathematics and Computer Science, Faculty of Science, Chulalongkorn University, Thailand.

if (nargin < 4 || isempty(condition))
condition = 1:size(exp, 2);
end
if (nargin < 5 || isempty(threshold))
threshold = 0.9;
end
if (nargin < 6 || isempty(alpha))
alpha = 0.99;
end

% construct the template model

modelN = model;
modelN.lb(modelN.lb >= 0) = 0;
modelN.lb(modelN.lb < 0) = -1000;
modelN.ub(modelN.ub <= 0) = 0;
modelN.ub(modelN.ub > 0) = 1000;

% convert to irreversible format

[modelIrrev, matchRev, rev2irrev, irrev2rev] = convertToIrreversible(modelN);

% Check that the gene names in the gene expression profile agree with
% the gene names in the metabolic model.

geneinMet = zeros(size(genetxt, 1) - 1, 1);
for i = 2:size(genetxt, 1)
for j = 1:size(modelIrrev.genes, 1)
if string(genetxt(i)) == string(modelIrrev.genes(j))
geneinMet(i - 1) = 1;
end
end
end
if sum(sum(geneinMet)) == 0
disp("gene name in gene expression data is not agree gene name in metabolic model")
end

% Choose only the gene expression profiles that match genes in the metabolic model.

exp1 = exp(geneinMet == 1, :);
txt2 = genetxt([0; geneinMet] == 1, 1);

% remove missing data

[exp1, TF] = rmmissing(exp1, 1);
txt2 = txt2(~TF, 1);
geneincoexnet = modelIrrev.genes(~TF);

% Construct co-expression network

cor = corr((exp1)');
cor1 = cor >= threshold;

% Find gene pairs that have a high correlation in the co-expression network.

indCorGene = zeros(1, 2);
l = 1;
for i = 1:size(cor, 1)
for j = i:size(cor,1)
if (cor1(i,j) ~= 0) && (i ~= j)
indCorGene(l, 1) = i;
indCorGene(l, 2) = j;
l = l + 1;
end
end
end

coGene = cell(size(indCorGene, 1), 2);
coGene(:, 1) = geneincoexnet(indCorGene(:, 1), 1);
coGene(:, 2) = geneincoexnet(indCorGene(:, 2), 1);

NameRxn={};
for i = 1:size(modelIrrev.genes)
[z1, NameRxn{i}] = findRxnsFromGenes(modelIrrev, modelIrrev.genes(i, 1), [], 1);
end

% Find reactions that correspond to the gene

NameCorRxn = {};
for i = 1:size(coGene, 1)
NameCorRxn{i, 1} = NameRxn{find(modelIrrev.genes == string(coGene(i, 1)))};
NameCorRxn{i, 2} = NameRxn{find(modelIrrev.genes == string(coGene(i, 2)))};
end

f = 0;
h = 0;
indCorRxn = zeros(10, 2);
for i = 1:size(NameCorRxn, 1)
if (~isempty(NameCorRxn{i, 1}) && ~isempty(NameCorRxn{i, 2}))
for j = 1:size(NameCorRxn{i, 1}, 1)
f = f + size(NameCorRxn{i, 2}, 1);
indCorRxn(h + 1 : h + size(NameCorRxn{i, 2}, 1), 1) = findRxnIDs(modelIrrev, NameCorRxn{i, 1}{j, 1});
for w = 1:size(NameCorRxn{i, 2}, 1)
indCorRxn(h + w,2)=findRxnIDs(modelIrrev,NameCorRxn{i, 2}{w, 1});
end
h = f;
end
end
end

% Construct a reaction pair matrix.

R = zeros(length(modelIrrev.rxns),length(modelIrrev.rxns));
for i = 1:size(indCorRxn, 1)
if indCorRxn(i, 1) ~= indCorRxn(i, 2)
R(indCorRxn(i, 1), indCorRxn(i, 2)) = 1;
end
end

% Construct an irreversible reaction matrix from the reversible reactions
% that are decomposed into the same components.

Re = zeros(size(R));
for i = 1:length(model.rxns)
if length(rev2irrev{i, 1}) == 2
Re(i, rev2irrev{i,1}(2)) = 1;
end
end

% Process gene expression data based on Gene-Protien-Reaction (GPR) associations.

geneExdat = zeros(length(modelIrrev.genes), 1);
nupb = zeros(length(modelIrrev.rules), size(exp, 2));
for ch = 1:size(exp, 2)
for i = 1:length(modelIrrev.genes)
cc = 0;
for j = 1:length(genetxt(:, 1)) - 1
if string(modelIrrev.genes(i)) == string(genetxt(j + 1, 1))
geneExdat(i) = exp(j, ch);
cc = cc + 1;
end
end
if geneExdat(i) == 0 && cc == 0
geneExdat(i) = 1000;
end
end

for i = 1:length(modelIrrev.rules)
rulegene = modelIrrev.rules{i};
if isempty(rulegene)
rsum = 1000;
else
newrule = split(rulegene,"|");
nrule = size(newrule, 1);
rsum = 0;
for j = 1:nrule
newrule1 = newrule{j};
nnrule = length(newrule1);
rmin = inf;
for k = 1:nnrule
if newrule1(k) == 'x'
r1 = k+2;
end
if (newrule1(k) == ')' && newrule1(k-1) ~= ' ' && newrule1(k - 1) ~= ')')
rmin = min(rmin, geneExdat(str2num(convertCharsToStrings((newrule1(r1:k - 1))))));
end
end
rsum = rsum + rmin;
end
end
nupb(i, ch) = rsum;
end

end
PosNupb = nupb >= 1000;
nupb(PosNupb) = max(nupb(~PosNupb));

% Construct a table for reporting the results

solution = table(model.rxns);
solutionFull = table(modelIrrev.rxns);
solutionEf = table(model.rxns);
solutionEfFull = table(modelIrrev.rxns);
n = 0;

% Construct the model and calculate the flux distribution.

for ch = condition
n = n + 1;
disp("Condition :"); disp(ch);
model3 = changeRxnBounds(modelIrrev,modelIrrev.rxns, nupb(:, ch), 'u');
solution1 = optimizeCbModel(model3);

Trans0 = zeros(size(modelIrrev.mets, 1),size(modelIrrev.rxns, 1));
Trans2 = -1 * eye(size(modelIrrev.rxns, 1));
S2 = zeros(size(modelIrrev.rxns, 1));
for i = 1:length(modelIrrev.rxns)
S2(i, i) = 1 / max(nupb(i, :));
end


Obj4 = [modelIrrev.c' zeros(1, size(modelIrrev.rxns, 1))] ;

lob = [model3.lb; (-1) * inf * ones(size(modelIrrev.rxns, 1), 1)];
upb = [model3.ub; inf * ones(size(modelIrrev.rxns, 1), 1)];

O = [zeros(size(R)) zeros(size(R)); zeros(size(R)) R];
Aeq = [modelIrrev.S Trans0; S2 Trans2; Obj4];
beq = [zeros(size(modelIrrev.mets, 1), 1); (-1) * ones(size(modelIrrev.rxns,1),1); alpha * solution1.f];

model2 = struct;
model2.lb = lob;
model2.ub = upb;
model2.A = sparse(Aeq);
model2.sense = [char('=' * ones(size(model2.A,1) - 1, 1)) ; char('>')];
model2.rhs = beq;
model2.modelsense = 'min';
numrxn = [1:length(modelIrrev.rxns)];
j = 1;
for i = 1:length(model.rxns)
if length(Re(Re(i, :)>0)) == 1
model2.quadcon(j).Qrow = i;
model2.quadcon(j).Qcol = numrxn(Re(i, :)>0);
model2.quadcon(j).Qval = 1.0;
model2.quadcon(j).rhs = 0.0;
model2.quadcon(j).q = sparse(zeros(2*size(Re, 1), 1));
model2.quadcon(j).sense = '=';
j = j + 1;
end
end
model2.Q = sparse(O);
params.NonConvex = 2;
result = gurobi(model2, params);
x = result.x;
solutionFull(:, n + 1) = table(x(1:size(modelIrrev.rxns),1));
solutionEfFull(:, n + 1) = table(solution1.x);
solFlux = [];
solFluxEf = [];
for i = 1:length(model.rxns)
if length(rev2irrev{i, 1}) == 1
solFlux(i, 1) = x(i, 1);
solFluxEf(i, 1) = solution1.x(i, 1);
else
solFlux(i, 1) = (x(rev2irrev{i, 1}(1, 1))-x(rev2irrev{i, 1}(1, 2)));
solFluxEf(i, 1) = (solution1.x(rev2irrev{i, 1}(1, 1))-solution1.x(rev2irrev{i, 1}(1, 2)));
if solFluxEf(i, 1)>=0
solutionEfFull{rev2irrev{i, 1}(1, 1), n + 1} = solFluxEf(i, 1);
solutionEfFull{rev2irrev{i, 1}(1, 2), n + 1} = 0;
elseif solFluxEf(i, 1)<0
solutionEfFull{rev2irrev{i, 1}(1, 1), n + 1} = 0;
solutionEfFull{rev2irrev{i, 1}(1, 2), n + 1} = abs(solFluxEf(i, 1));

end
end
end
solution(:, n + 1) = table(solFlux);
solutionEf(:, n + 1) = table(solFluxEf);

end
boundEf = table(nupb);
solICONGEMs.sol = solution;
solICONGEMs.solRev = solutionFull;

solEflux.sol = solutionEf;
solEflux.solRev = solutionEfFull;

% Export the results

filename = 'result.csv';
writetable(solution, filename)
end

1 change: 1 addition & 0 deletions src/analysis/ICONGEMs/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

1 change: 1 addition & 0 deletions test/additionalTests/testICONGEMs/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

73 changes: 73 additions & 0 deletions test/additionalTests/testICONGEMs/testICONGEMs.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
% The COBRAToolbox: testICONGEMs.m
%
% Purpose:
% - Tests the ICONGEMs function
%
%..Author:
% -Original file, Thummarat Paklao, Apichat Suratanee, and Kitiporn Plaimas.

global CBTDIR

% set the tolerance
tol = 1e-4;
tol2 = 2e-1;

% define the solver required to run the test
useIfAvailable = {'gurobi'};

% test solver packages
solverPkgs = prepareTest('needsLP', true, 'needsQP', true, 'useSolversIfAvailable', useIfAvailable);

% save the current path
currentDir = pwd;

% initialize the test
fileDir = fileparts(which('testICONGEMs'));
cd(fileDir);

% load model
modelFileName = 'ecoli_core_model.mat';
model = getDistributedModel(modelFileName);
[modelIrrev, matchRev, rev2irrev, irrev2rev] = convertToIrreversible(model);
Re = zeros(size(modelIrrev.rxns,1),size(modelIrrev.rxns,1));
for i = 1:length(model.rxns)
if length(rev2irrev{i, 1}) == 2
Re(i, rev2irrev{i,1}(2)) = 1;
end
end

% load gene expression data
fileName = 'gene_exp.csv';
[exp, txt] = xlsread(fileName);

% set optional parameter
condition1=1:size(exp,2);
threashold = 0.8;
alpha = 0.99;

% test optimizeCbModel in cobratoolbox

sol_fba = optimizeCbModel(model);

for k = 1:length(solverPkgs.QP)
fprintf(' -- Running testICONGEMs using the solver interface: %s ... ', solverPkgs.QP{k});

solverQPOK = changeCobraSolver(solverPkgs.QP{k}, 'QP', 0);

if solverQPOK
[solICONGEMs, solEflux, R, boundEf] = ICONGEMs(model, exp, txt);
for i = 1:size(exp,2)
assert(norm(model.S * table2array(solICONGEMs.sol(:,i+1)) - model.b, 2) < tol);
assert(abs(table2array(solICONGEMs.solRev(:,i+1))' * Re * table2array(solICONGEMs.solRev(:,i+1))) < tol)
assert(abs(model.c' * table2array(solICONGEMs.sol(:,i+1)) - alpha * model.c' * table2array(solEflux.sol(:,i+1))) < tol)

end

end

% output a success message
fprintf('Done.\n');
end

% change the directory
cd(currentDir)
Loading