From 38894453a93db0438cb12d4bdb104acda394502e Mon Sep 17 00:00:00 2001 From: Qiao Chen Date: Thu, 2 Sep 2021 12:28:26 -0400 Subject: [PATCH] Chiao/dev (#3) * changed to enable -NDEBUG in mmex for -O * Updated license * Added FGMRES solver for nullspace * Set rank for backsolve * Added PIPIT * updated reference to hifir * Removed unused codes * Updated CPP source directory * Added version and updated ijv2crs * Updated C++ kernels in HIFIR4M with new HIFIR * Updated examples using new APIs. * Updated using field name for params * Fixed authors * Fixed checking before recompilation * updated .gitignore * updated pipitHifir * Added download hifir script * Updated building script to use downloaded hifir * Removed submodule * Added option to use local C++ hifir * Updated building process * Updated ignore list for user hifir source * Changed to urlwrite * Updated varaible name * Fixed integer size for Octave * Added update and refactor * Fixed bugs in apply * Fixed bugs in create * Updated building script * Updated cite bibtex in readme * Fixed bug in hifir * Added version file and build * Added user HIF * Updated control parameters * Updated README * Updated to addparam * Updated installation * Added user-provided HIF * Added changelog Co-authored-by: Xiangmin Jiao --- .gitignore | 3 + .gitmodules | 3 - CHANGELOG.md | 7 + README.md | 32 +- VERSION | 1 + api/HifEnum.m | 9 +- api/HifParams.m | 13 +- api/Hifir.m | 10 + api/hifApply.m | 11 +- api/hifCreate.m | 29 +- api/hifRefactorize.m | 38 ++ api/hifUpdate.m | 29 ++ examples/complex_eg.m | 38 +- examples/complex_multiply.m | 48 +-- examples/random_eg.m | 33 +- examples/singular_callback_eg.m | 36 -- examples/singular_eg.m | 23 +- gmresHif.m | 61 ++-- hifir | 1 - ksp/fgmresNull.m | 355 +++++++++++++++++++ ksp/orthNulls.m | 94 +++++ ksp/private/crs_Axpy.m | 119 ------- ksp/private/crs_prodAx.m | 38 ++ mex/HIF.m | 112 ------ mex/hifir4m.hpp | 488 +++++--------------------- mex/hifir4m_clear.m | 22 -- mex/hifir4m_config.hpp | 5 +- mex/hifir4m_create_params.m | 86 ----- mex/hifir4m_empty.m | 22 -- mex/hifir4m_ensure_int.m | 16 - mex/hifir4m_export.m | 84 ----- mex/hifir4m_factorize.m | 97 ----- mex/hifir4m_fgmres.m | 117 ------ mex/hifir4m_finalize.m | 22 -- mex/hifir4m_git_revision.m | 65 ---- mex/hifir4m_hifir.m | 49 --- mex/hifir4m_initialize.m | 38 -- mex/hifir4m_mex.cpp | 224 ------------ mex/hifir4m_mmultiply.m | 46 --- mex/hifir4m_query_stats.m | 24 -- mex/hifir4m_solve.m | 48 --- mex/hifir4m_sp2crs.m | 7 + mex/hifir4m_version.cpp | 33 ++ mex/private/HIFIR4M_CHECK.m | 5 - mex/private/HIFIR4M_CLEAR.m | 5 - mex/private/HIFIR4M_CREATE.m | 5 - mex/private/HIFIR4M_DESTROY.m | 5 - mex/private/HIFIR4M_EXPORT_DATA.m | 5 - mex/private/HIFIR4M_FACTORIZE.m | 5 - mex/private/HIFIR4M_KSP_NULL.m | 6 - mex/private/HIFIR4M_KSP_SOLVE.m | 5 - mex/private/HIFIR4M_M_MULTIPLY.m | 5 - mex/private/HIFIR4M_M_SOLVE.m | 5 - mex/private/HIFIR4M_M_SOLVE2.m | 5 - mex/private/HIFIR4M_QUERY.m | 6 - mex/{ => private}/hifir4m_ijv2crs.cpp | 16 +- pipitHifir.m | 223 ++++++++++++ util/build_hifir4m.m | 61 +++- util/download_hifir.m | 12 + util/octave/mmex.m | 18 +- util/relativepath.m | 107 ------ 61 files changed, 1167 insertions(+), 1968 deletions(-) delete mode 100644 .gitmodules create mode 100644 CHANGELOG.md create mode 100644 VERSION create mode 100644 api/hifRefactorize.m create mode 100644 api/hifUpdate.m delete mode 100644 examples/singular_callback_eg.m delete mode 160000 hifir create mode 100644 ksp/fgmresNull.m create mode 100644 ksp/orthNulls.m delete mode 100644 ksp/private/crs_Axpy.m create mode 100644 ksp/private/crs_prodAx.m delete mode 100644 mex/HIF.m delete mode 100644 mex/hifir4m_clear.m delete mode 100644 mex/hifir4m_create_params.m delete mode 100644 mex/hifir4m_empty.m delete mode 100644 mex/hifir4m_ensure_int.m delete mode 100644 mex/hifir4m_export.m delete mode 100644 mex/hifir4m_factorize.m delete mode 100644 mex/hifir4m_fgmres.m delete mode 100644 mex/hifir4m_finalize.m delete mode 100644 mex/hifir4m_git_revision.m delete mode 100644 mex/hifir4m_hifir.m delete mode 100644 mex/hifir4m_initialize.m delete mode 100644 mex/hifir4m_mmultiply.m delete mode 100644 mex/hifir4m_query_stats.m delete mode 100644 mex/hifir4m_solve.m create mode 100644 mex/hifir4m_version.cpp delete mode 100644 mex/private/HIFIR4M_CHECK.m delete mode 100644 mex/private/HIFIR4M_CLEAR.m delete mode 100644 mex/private/HIFIR4M_CREATE.m delete mode 100644 mex/private/HIFIR4M_DESTROY.m delete mode 100644 mex/private/HIFIR4M_EXPORT_DATA.m delete mode 100644 mex/private/HIFIR4M_FACTORIZE.m delete mode 100644 mex/private/HIFIR4M_KSP_NULL.m delete mode 100644 mex/private/HIFIR4M_KSP_SOLVE.m delete mode 100644 mex/private/HIFIR4M_M_MULTIPLY.m delete mode 100644 mex/private/HIFIR4M_M_SOLVE.m delete mode 100644 mex/private/HIFIR4M_M_SOLVE2.m delete mode 100644 mex/private/HIFIR4M_QUERY.m rename mex/{ => private}/hifir4m_ijv2crs.cpp (94%) create mode 100644 pipitHifir.m create mode 100644 util/download_hifir.m delete mode 100644 util/relativepath.m diff --git a/.gitignore b/.gitignore index eae53ac..cfe316a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,7 @@ .DS_Store .*swp *.o +*.bak +*.asv .MATLABDriveTag +hifir-* diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 76f39d8..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "hifir"] - path = hifir - url = https://github.com/fastsolve/hifir.git diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7b064cf --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,7 @@ +# Changelog # + +All notable changes to this project will be documented in this file. + +## [v0.1.0](https://github.com/hifirworks/hifir4m/releases/tag/v0.1.0) (2021-09-02) ## + +This is the initial official release of the HIFIR4M package based on the C++ HIFIR [v0.1.0](https://github.com/hifirworks/hifir/releases/tag/v0.1.0). diff --git a/README.md b/README.md index d20bf26..f2e7fd3 100644 --- a/README.md +++ b/README.md @@ -4,20 +4,36 @@ ## Installation ## -Clone this project to your preferred location. Then start MATLAB or GNU Octave under the directory that contains `hifir4m`, or run the command +Clone this project to your preferred location, i.e., + +```console +git clone -b release https://github.com/hifirworks/hifir4m.git +``` + +Use `git pull` to download any new changes that have been added since `git clone` or last `git pull`. Alternatively, use `git checkout v[GLOBAL].[MAJOR].[MINOR]` to download a specific version. + +Then start MATLAB or GNU Octave under the directory that contains `hifir4m`, or run the command ```matlab ->> run('/path/to/hifir4m/load_hifir') +>> run('/path/to/hifir4m/startup_hifir') ``` (and replace `/path/to/hifir4m/` to the directory that contains `hifir4m`) bo build `hifir4m` and load its path. It will build the *mex* kernels if needed by linking to MATLAB and Octave's built-in BLAS/LAPACK libraries for the low-level QRCP. +Note that for the first time, `hifir4m` will download C++ package [HIFIR](https://github.com/hifirworks/hifir) while building *mex* kernels. If you don't have access to network during building *mex* kernels, then you can obtain HIFIR beforehand and put its source in `hifir4m/hifir-[hifir-version]` folder; for instance, you can run the following command to download the C++ HIFIR package + +```console +cd /path/to/hifir4m +wget -qO- https://github.com/hifirworks/hifir/archive/refs/tags/v`cat VERSION`.tar.gz|tar xzf - +``` + ## Usage ## The easiest way to use `hifir4m` is to call the `gmresHif` interface. For example, ```matlab >> [x, flag, relres, iter, reshis, times] = gmresHif(A, b); ``` + where `A` is a MATLAB's built-in sparse matrix or a `MATLAB` `struct` containing the filds of `row_ptr`, `col_ind`, `vals` of a standard CRS storage format, and `b` is a right-hand-side vector (or RHS vectors with two columns). To access the intermediate-level interfaces of `hifir4m`, please see `gmresHif.m` for the calling sequence of `hifCreate`, `hifApply`, and `hifDestroy`. @@ -29,6 +45,7 @@ To access the intermediate-level interfaces of `hifir4m`, please see `gmresHif.m The software suite is released under a dual-license model. For academic users, individual users, or open-source software developers, you can use HIFIR under the GPLv3+ license free of charge for research and evaluation purpose. For commercial users, separate commercial licenses are available through the Stony Brook University. For inqueries regarding commercial licenses, please contact Prof. Xiangmin Jiao . ## How to Cite `HIFIR` ## + If you use `HIFIR`, `hifir4m`, or `hifir4py` in your research for nonsingular systems, please cite the `HILUCSI` paper: ```bibtex @@ -38,7 +55,6 @@ If you use `HIFIR`, `hifir4m`, or `hifir4py` in your research for nonsingular s large-scale saddle-point problems from {PDE}s}, journal = {Numer. Linear Algebra Appl.}, year = {2021}, - note = {To appear}, doi = {10.1002/nla.2400}, ``` @@ -47,20 +63,20 @@ If you use them to solve highly ill-conditioned of singular systems, please cite ```bibtex @Article{jiao2020approximate, author = {Xiangmin Jiao and Qiao Chen}, - journal = {arxiv}, + journal = {SIAM J. Matrix Anal. Appl.}, title = {Approximate Generalized Inverses with Iterative Refinement for $\epsilon$-Accurate Preconditioning of Singular Systems}, - year = {2020}, - note = {arXiv:2009.01673}, + year = {2021}, + note = {To appear}, } @Article{chen2021hifir, - author = {Jiao, Xiangmin and Chen, Qiao}, + author = {Chen, Qiao and Jiao, Xiangmin}, title = {{HIFIR}: Hybrid Incomplete Factorization with Iterative Refinement for Preconditioning Ill-conditioned and Singular Systems}, journal = {arxiv}, year = {2021}, - note = {arXiv:21...}, + note = {arXiv:2106.09877}, } ``` diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..6c6aa7c --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.1.0 \ No newline at end of file diff --git a/api/HifEnum.m b/api/HifEnum.m index e0fb441..9a71f33 100644 --- a/api/HifEnum.m +++ b/api/HifEnum.m @@ -8,11 +8,8 @@ DESTROY = int32(4); FACTORIZE = int32(5); M_SOLVE = int32(6); - KSP_SOLVE = int32(7); - KSP_NULL = int32(8); - EXPORT_DATA = int32(9); - M_SOLVE2 = int32(10); - M_MULTIPLY = int32(11); - QUERY = int32(12); + M_MULTIPLY = int32(7); + EXPORT_DATA = int32(8); + QUERY = int32(9); end end diff --git a/api/HifParams.m b/api/HifParams.m index 5f9747a..674d366 100644 --- a/api/HifParams.m +++ b/api/HifParams.m @@ -21,7 +21,7 @@ % verbose: 0 % rf_par: 1 % reorder: 1 -% saddle: 1 +% spd: 0 % check: 1 % pre_scale: 0 % symm_pre_lvls: 1 @@ -54,7 +54,7 @@ 'verbose', 0, ... 'rf_par', 1, ... 'reorder', 1, ... - 'saddle', 1, ... + 'spd', 0, ... 'check', 1, ... 'pre_scale', 0, ... 'symm_pre_lvls', 1, ... @@ -62,11 +62,13 @@ 'mumps_blr', 1, ... 'fat_schur_1st', 0, ... 'rrqr_cond', 0, ... - 'pivot', 0, ... + 'pivot', 2, ... 'gamma', 1, ... 'beta', 1e3, ... 'is_symm', 0, ... 'no_pre', 0, ... + 'nzp_thres', 0.65, ... + 'dense_thres', 2000, ... 'is_mixed', 0, ... 'is_complex', 0); fields = fieldnames(defaults); @@ -74,13 +76,13 @@ p = inputParser; positive_params = {'tau_L', 'tau_U', 'kappa_d', 'kappa', 'alpha_L', ... - 'alpha_U', 'rho', 'c_d', 'c_h'}; + 'alpha_U', 'rho', 'c_d', 'c_h', 'nzp_thres', 'dense_thres'}; for i = 1:numel(positive_params) addParameter(p, positive_params{i}, defaults.(positive_params{i}), ... @(x) isscalar(x) && x > 0); end -nonnegative_params = {'verbose', 'rf_par', 'reorder', 'saddle', 'check', ... +nonnegative_params = {'verbose', 'rf_par', 'reorder', 'spd', 'check', ... 'pre_scale', 'symm_pre_lvls', 'threads', 'mumps_blr', 'fat_schur_1st', ... 'pivot', 'gamma', 'beta', 'is_symm', 'no_pre', 'is_mixed', 'is_complex'}; for i = 1:numel(nonnegative_params) @@ -91,6 +93,7 @@ addParameter(p, 'N', defaults.N, @isscalar); addParameter(p, 'rrqr_cond', defaults.rrqr_cond, @isscalar); +p.KeepUnmatched = true; % This seems needed for Octave parse(p, varargin{:}); sorted_opts = p.Results; diff --git a/api/Hifir.m b/api/Hifir.m index 0bbf9e1..1b48440 100644 --- a/api/Hifir.m +++ b/api/Hifir.m @@ -34,6 +34,16 @@ [varargout{1:nargout}] = hifApply(h, x, varargin{:}); end + function update(h, A) + % Update coefficient matrix used in iterative refinement + h = hifUpdate(h, A); + end + + function varargout = refactorize(h, S, varargin) + % Refactorization + [varargout{1:nargout}] = hifRefactorize(h, S, varargin{:}); + end + function delete(h) % Destructor for handle if ~isempty(h.hdl) diff --git a/api/hifApply.m b/api/hifApply.m index 337ae81..41f854f 100644 --- a/api/hifApply.m +++ b/api/hifApply.m @@ -1,4 +1,4 @@ -function [y, varargout] = hifApply(hif, x, op, rank, nirs) +function varargout = hifApply(hif, x, op, rank, nirs) % hifApply Applies a HIFIR preconditioner to solve or multiply. % % y = hifApply(hif, x [op, rank, nirs]) @@ -25,11 +25,12 @@ assert(nargin <= 5, 'Solve accepts up to five arguments.'); if nargin == 5 && nirs > 1 % with iterative refinement - [y, varargout{:}] = hifir4m_mex(HifEnum.M_SOLVE, hif.hdl, ... - A.row_ptr, A.col_ind, A.val, x, nirs, ... + assert(~isempty(hif.A), 'hif.A must not be empty.'); + [varargout{1:nargout}] = hifir4m_mex(HifEnum.M_SOLVE, hif.hdl, ... + x, hif.A.row_ptr, hif.A.col_ind, hif.A.val, nirs, ... op(end) == 'H' || op(end) == 'T' || op(end) == 'h' || op(end) == 't', rank); else - [y, varargout{1:nargout}] = hifir4m_mex(HifEnum.M_SOLVE, hif.hdl, x, ... + [varargout{1:nargout}] = hifir4m_mex(HifEnum.M_SOLVE, hif.hdl, x, ... op(end) == 'H' || op(end) == 'T' || op(end) == 'h' || op(end) == 't', rank); end else @@ -37,7 +38,7 @@ 'First character must be ''s'' or ''m'''); assert(nargin <= 4, 'Multiply accepts up to four arguments.'); - [y, varargout{1:nargout}] = hifir4m_mex(HifEnum.M_MULTIPLY, hif.hdl, x, ... + [varargout{1:nargout}] = hifir4m_mex(HifEnum.M_MULTIPLY, hif.hdl, x, ... op(end) == 'H' || op(end) == 'T' || op(end) == 'h' || op(end) == 't', rank); end diff --git a/api/hifCreate.m b/api/hifCreate.m index 0293a50..40b360d 100644 --- a/api/hifCreate.m +++ b/api/hifCreate.m @@ -37,36 +37,37 @@ if issparse(A) Astruct = hifir4m_sp2crs(A); else + Astruct = A; if hifir4m_isint64 if ~isa(A.row_ptr, 'int64') Astruct.row_ptr = int64(A.row_ptr); Astruct.col_ind = int64(A.col_ind); end - elseif ~isint64 + else if ~isa(A.row_ptr, 'int32') Astruct.row_ptr = int32(A.row_ptr); Astruct.col_ind = int32(A.col_ind); end end - Astruct.val = double(A.val); end -Astruct.nrows = int32(numel(Astruct.row_ptr)-1); if nargin <= 1 || isempty(S) Sstruct = Astruct; +elseif issparse(S) + Sstruct = hifir4m_sp2crs(S); else - if hifir4m_isint64 - if ~isa(S.row_ptr, 'int64') - Sstruct.row_ptr = int64(S.row_ptr); - Sstruct.col_ind = int64(S.col_ind); - end - elseif ~isint64 - if ~isa(S.row_ptr, 'int32') - Sstruct.row_ptr = int32(S.row_ptr); - Sstruct.col_ind = int32(S.col_ind); - end + Sstruct = S; +end +if hifir4m_isint64 + if ~isa(Sstruct.row_ptr, 'int64') + Sstruct.row_ptr = int64(Sstruct.row_ptr); + Sstruct.col_ind = int64(Sstruct.col_ind); + end +else + if ~isa(Sstruct.row_ptr, 'int32') + Sstruct.row_ptr = int32(Sstruct.row_ptr); + Sstruct.col_ind = int32(Sstruct.col_ind); end - Sstruct.val = double(S.val); end [varargout{1:nargout-1}] = hifir4m_mex(HifEnum.FACTORIZE, hif, ... diff --git a/api/hifRefactorize.m b/api/hifRefactorize.m new file mode 100644 index 0000000..923406e --- /dev/null +++ b/api/hifRefactorize.m @@ -0,0 +1,38 @@ +function varargout = hifRefactorize(hif, S, varargin) +% hifRefactorize refactorizes a preconditioner given a sparsifier S +% +% hifRefactorize(hif, S) +% hifRefactorize(hif, S, 'alpha_L', 5, 'alpha_U', 5); +% +% See also hifUpdate hifCreate hifApply + +assert(~isempty(hif.hdl), 'the preconditioner cannot be empty.'); + +if ~isempty(varargin) && isstruct(varargin{1}) + params = varargin{1}; +else + params = HifParams(varargin{:}); +end + +% Setup sparsifier +if issparse(S) + Sstruct = hifir4m_sp2crs(S); +else + Sstruct = S; + if hifir4m_isint64 + if ~isa(S.row_ptr, 'int64') + Sstruct.row_ptr = int64(S.row_ptr); + Sstruct.col_ind = int64(S.col_ind); + end + else + if ~isa(S.row_ptr, 'int32') + Sstruct.row_ptr = int32(S.row_ptr); + Sstruct.col_ind = int32(S.col_ind); + end + end +end + +[varargout{1:nargout-1}] = hifir4m_mex(HifEnum.FACTORIZE, hif.hdl, ... + Sstruct.row_ptr, Sstruct.col_ind, Sstruct.val, params); + +end \ No newline at end of file diff --git a/api/hifUpdate.m b/api/hifUpdate.m new file mode 100644 index 0000000..3ad81e4 --- /dev/null +++ b/api/hifUpdate.m @@ -0,0 +1,29 @@ +function hif = hifUpdate(hif, A) +% hifUpdate Updates a coefficient matrix in hif for iterative refinement +% +% hif = hifUpdate(hif, A) +% +% See also hifApply hifRefactorize + +if issparse(A) + Astruct = hifir4m_sp2crs(A); +else + Astruct = A; + if hifir4m_isint64 + if ~isa(A.row_ptr, 'int64') + Astruct.row_ptr = int64(Astruct.row_ptr); + Astruct.col_ind = int64(Astruct.col_ind); + end + else + if ~isa(A.row_ptr, 'int32') + Astruct.row_ptr = int32(Astruct.row_ptr); + Astruct.col_ind = int32(Astruct.col_ind); + end + end +end +if ~isempty(hif.A) + assert(hif.A.nrows == Astruct.nrows, 'mismatched row sizes'); + assert(hif.A.ncols == Astruct.ncols, 'mismatched column sizes'); +end +hif.A = Astruct; +end \ No newline at end of file diff --git a/examples/complex_eg.m b/examples/complex_eg.m index 125184f..4d463bf 100644 --- a/examples/complex_eg.m +++ b/examples/complex_eg.m @@ -1,23 +1,23 @@ -clear; -load('kim1.mat', 'Problem'); -is_mixed = false; -is_complex = true; -A = Problem.A; -n = size(A,1); -b = A*ones(n,1); +%{ + This example code uses HIF-preconditioned GMRES(30) to solve a + complex testing system "kim1" from SuiteSparse Matrix Collection. +%} -%% Initialize database -h = HIF(is_mixed, is_complex); +clear; -%% Factorize A -info = h.factorize(A); -disp(info); +load('kim1.mat', 'Problem'); -%% Solve for x=A\b -[x, flag] = h.fgmres(A, b); -if flag; fprintf(2, 'warning! solver failed with flag %d\n', flag); end -res = norm(A*x-b)/norm(b); -if res > 1e-6; fprintf(2, 'residual too large %.4g\n', res); end +% RHS is A*1 +A = Problem.A; +n = size(A, 1); +b = A*ones(n, 1); -%% Finalize -clear h \ No newline at end of file +% solve with HIF-preconditioned GMRES +[x, flag] = gmresHif(A, b, 'is_complex', true); +if flag + fprintf(2, 'warning! solver failed with flag %d\n', flag); +end +res = norm(b - A * x) / norm(b); +if res > 1e-6 + fprintf(2, 'residual too large %.4g\n', res); +end diff --git a/examples/complex_multiply.m b/examples/complex_multiply.m index 940c33b..b8eba81 100644 --- a/examples/complex_multiply.m +++ b/examples/complex_multiply.m @@ -1,33 +1,33 @@ +%{ + This example code demonstrates how to apply multilevel triangular + solve and matrix-vector multiplication in both regular and Hermitian + modes for the testing matrix "kim1" from SuiteSparse Matrix + Collections. +%} + clear; + load('kim1.mat', 'Problem'); -is_mixed = false; -is_complex = true; + A = Problem.A; n = size(A,1); b = A*ones(n,1); -%% Initialize database -h = HIF(is_mixed, is_complex); - -%% Factorize A -info = h.factorize(A); -disp(info); - -%% Solve for x=h\b -x = h.solve(b); - -%% Compute y = h*x -y = h.mmultiply(x); - -assert(norm(y-b)/norm(b) <= 1e-12, 'failed matrix-vector test'); - -%% Solve for x=h'\b -x = h.solve(b, true); +% Compute HIF preconditioner +[hdl, stats] = hifCreate(A, [], 'is_complex', true); +fprintf(1, 'Factorization finished with stats:\n\n'); +disp(stats); -%% Compute y = h'*x -y = h.mmultiply(x, true); +% Apply solve and multiplication +x = hifApply(hdl, b, 'S'); % triangular solve +b2 = hifApply(hdl, x, 'M'); % mat-vec multiplication +res = norm(b2 - b) / norm(b); +assert(res <= 1e-10, 'residual %g is too large', res); -assert(norm(y-b)/norm(b) <= 1e-12, 'failed Hermitian matrix-vector test'); +% Apply solve and multiplication in Hermitian mode +x = hifApply(hdl, b, 'SH'); % Hermitian triangular solve +b2 = hifApply(hdl, x, 'MH'); % Hermitian mat-vec multiplication +res = norm(b2 - b) / norm(b); +assert(res <= 1e-10, 'residual %g is too large', res); -%% Finalize -clear h \ No newline at end of file +delete(hdl); diff --git a/examples/random_eg.m b/examples/random_eg.m index bfdf369..02f513c 100644 --- a/examples/random_eg.m +++ b/examples/random_eg.m @@ -1,27 +1,20 @@ %{ -This example uses a density 0.5 of 10x10 sparse matrix to show how to use -HIFIR4M and the GMRES solver. + This example uses a density 0.5 of 10x10 sparse matrix to show how + to use HIF-preconditioned GMRES solver. %} clear; -is_mixed = true; - +% setup testing matrix A = sprand(10, 10, 0.5); b = rand(10, 1); -%% Initialize database -dbase = hifir4m_initialize(is_mixed); - -%% Factorize A -info = hifir4m_factorize(dbase, A); -disp(info); - -%% Solve for x=A\b -[x, flag] = hifir4m_fgmres(dbase, A, b); -if flag; fprintf(2, 'warning! solver failed with flag %d\n', flag); end -res = norm(A*x-b)/norm(b); -if res > 1e-6; fprintf(2, 'residual too large %.4g\n', res); end - -%% Finalize -hifir4m_finalize(dbase); -clear dbase \ No newline at end of file +% Call HIF-preconditioned GMRES +[x, flag] = gmresHif(A, b, 'is_mixed', true); % using mixed-precision +if flag + fprintf(2, 'warning! solver failed with flag %d\n', flag); +end +res = norm(b - A * x) / norm(b); +if res > 1e-6 + fprintf(2, 'residual too large %.4g\n', res); +end + \ No newline at end of file diff --git a/examples/singular_callback_eg.m b/examples/singular_callback_eg.m deleted file mode 100644 index b081ad9..0000000 --- a/examples/singular_callback_eg.m +++ /dev/null @@ -1,36 +0,0 @@ -%{ -Example of solving singular system - -NOTE: - This example uses user callback to filter out the constant mode (right - null space). The callback must be taking one argument and return one - argument, where the argument is a 1D column vector, which is the - direction vector in GMRES. The result is expected to not contain any - components in the (right) null space. -%} - -clear; -h = HIF; -N = 256; -A = gallery('neumann', 256); -left_nsp = null(full(A')); -b = rand(N, 1); -% filter the left null space from b, need to do it for now -b = b - ((left_nsp'*b)/norm(left_nsp)^2)*left_nsp; -h.factorize(A); -% one can do this -x = h.fgmres(A, b, [], [], [], [], [], [], @my_filter); -% or -% x = h.fgmres(A, b, [], [], [], [], [], [], @(x) my_filter(x, N)); -fprintf(1, 'relative residual in 2 norm is %g\n', norm(A*x-b)/norm(b)); -clear h - -function v = my_filter(v, N) -% NOTE and WARING: The function must agree with this interface, input is -% one column vector with output one column vector. This will be called -% inside the GMRES KSP solver. Notice that one can capture user data by -% having multiple arguments where default values are supplied from the -% second one, or create function handle by capturing other data vars. -if nargin < 2; N = length(v); end -v = v - sum(v)/N; -end \ No newline at end of file diff --git a/examples/singular_eg.m b/examples/singular_eg.m index fe0d75c..4c441a1 100644 --- a/examples/singular_eg.m +++ b/examples/singular_eg.m @@ -1,17 +1,16 @@ %{ -Example of solving singular system + Example of solving singular system using PIPIT with testing matrix + gallery('neumann', 256). %} clear; -h = HIF; -N = 256; + A = gallery('neumann', 256); -left_nsp = null(full(A')); -b = rand(N, 1); -% filter the left null space from b, need to do it for now -b = b - ((left_nsp'*b)/norm(left_nsp)^2)*left_nsp; -info = h.factorize(A); -disp(info); -x = h.fgmres(A, b, [], [], [], [], [], [], [1, N]); -fprintf(1, 'relative residual in 2 norm is %g\n', norm(A*x-b)/norm(b)); -clear h \ No newline at end of file +b = rand(size(A, 1), 1); + +% Call PIPIT with 1 dim(Null) +x = pipitHifir(A, b, 1); +res = norm(A' * (b - A* x)) / norm(A' * b); +if res > 1e-6 + fprintf(2, 'residual too large %.4g\n', res); +end diff --git a/gmresHif.m b/gmresHif.m index 4689ef2..64c35e7 100644 --- a/gmresHif.m +++ b/gmresHif.m @@ -1,9 +1,9 @@ function [x, flag, relres, iter, resids, times] = gmresHif(A, b, varargin) % gmresHif Restarted GMRES with HIF as right preconditioner % -% x = gmresHif(A, b [, restart, rtol, maxit, x0]) +% x = gmresHif(A, b [, M, restart, rtol, maxit, x0]) % -% solves a sparse linear system using HIF as the right preconditioner. +% solves a sparse linear system using HIF as the preconditioner. % Matrix A can be in MATLAB's built-in sparse format or in CRS struct % with row_ptr, col_ind, and vals fields. % @@ -17,9 +17,16 @@ % the default value is 500. (Note that unlike MATLAB's built-in gmres, % maxit here refers to the total number of iterations.) % +% x0 specifies the initial guess. +% % x = gmresHif(A, b, ..., 'name', value, ...) % allows omitting some of the optional arguments followed by name-value % pairs for the parameters. The parameters include: +% M: HIF preconditioner. Default is [], which means this +% function will compute a HIF preconditioner. For dynamic +% and/or nonlinear problem, it is preferable to allow +% the users to manage preconditioners outside of this +% function. % verbose: verbose level. Default is 1. % pcside: Side of preconditioner ('left' or 'right'). If 'left', % MATLAB built-in GMRES will be called. Default is 'right'. @@ -46,11 +53,12 @@ p = inputParser; % Initialize default arguments -addOptional(p, 'restart', int32(30), @(x) isempty(x) || isscalar(x) && (x>0)); -addOptional(p, 'rtol', 1.e-6, @(x) isempty(x) || isscalar(x) && (x>0)); -addOptional(p, 'maxit', int32(500), @(x) isempty(x) || isscalar(x) && (x>0)); -addOptional(p, 'x0', cast([], class(b)), ... - @(x) isempty(x) || isequal(size(x0), size(b))); +addParameter(p, 'M', [], @(x) isempty(x) || isa(x, 'Hifir')); +addParameter(p, 'restart', int32(30), @(x) isempty(x) || isscalar(x)); +addParameter(p, 'rtol', 1.e-6, @(x) isempty(x) || isscalar(x)); +addParameter(p, 'maxit', int32(500), @(x) isempty(x) || isscalar(x)); +addParameter(p, 'x0', cast([], class(b)), ... + @(x) isempty(x) || isequal(size(x), size(b))); addParameter(p, 'verbose', int32(1), @isscalar); addParameter(p, 'pcside', 'right', ... @@ -63,31 +71,40 @@ opts = p.Results; % Process positional arguments -if isempty(opts.restart) +if isempty(opts.restart) || opts.restart <= 0 opts.restart = 30; end -if isempty(opts.maxit) +if isempty(opts.maxit) || opts.maxit <= 0 opts.maxit = 500; end -if isempty(opts.rtol) - opts.rtol = 1.-6; +if isempty(opts.rtol) || opts.rtol <= 0 + opts.rtol = 1.e-6; end % Create Hifir object -if opts.verbose - fprintf(1, 'Computing hybrid factorization...\n'); -end - +computedHif = isempty(opts.M); times = zeros(2, 1); -args = namedargs2cell(p.Unmatched); -[hif, info, times(1)] = hifCreate(A, [], 'verbose', ... - opts.verbose>1, args{:}); +if computedHif + if opts.verbose + fprintf(1, 'Computing hybrid incomplete factorization...\n'); + end + args = namedargs2cell(p.Unmatched); + [hif, info, times(1)] = hifCreate(A, [], 'verbose', ... + opts.verbose>1, args{:}); +else + if opts.verbose + fprintf(1, 'Using user-provided hybrid incomplete factorization...\n'); + end + hif = opts.M; +end M = @(x) hifApply(hif, x); if opts.verbose - fprintf(1, 'Finished ILU factorization in %.4g seconds \n', times(1)); - disp(info); - fprintf(1, 'Starting Krylov solver ...\n'); + if computedHif + fprintf(1, 'Computed HIF factorization in %.4g seconds \n', times(1)); + disp(info); + end + fprintf(1, 'Starting GMRES with HIF as right-preconditioner...\n'); end tic; @@ -107,7 +124,7 @@ if opts.verbose if flag == 0 - fprintf(1, 'Finished solve in %d iterations and %.2f seconds.\n', iter, times(2)); + fprintf(1, 'Computed solution in %d iterations and %.4g seconds.\n', iter, times(2)); elseif flag == 3 fprintf(1, 'GMRES stagnated after %d iterations and %.4g seconds.\n', iter, times(2)); else diff --git a/hifir b/hifir deleted file mode 160000 index b48dc9d..0000000 --- a/hifir +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b48dc9d3333ce4a64a1215808ad34f9903b84d89 diff --git a/ksp/fgmresNull.m b/ksp/fgmresNull.m new file mode 100644 index 0000000..cbd5be4 --- /dev/null +++ b/ksp/fgmresNull.m @@ -0,0 +1,355 @@ +function [x, flag, iter, ref_iter] = fgmresNull(A, b, ... + us, leftnull, restart, rtol, maxit, M, ~, x0) +%fgmresNull Kernel for computing a null-space vector +% +% Syntax: +% x = fgmresNull(A, b, us, leftnull, restart, rtol, maxit, M, ~, x0) +% [x, flag, iter, ref_iter] = fgmresNull(...) +% +% See Also: +% fgmres_HO + +n = int32(size(b, 1)); + +% If RHS is zero, terminate +beta0 = sqrt(vec_sqnorm2(b)); +if beta0 == 0 + x = zeros(n, 1); + flag = int32(0); + iter = int32(0); + ref_iter = int32(0); + return; +end + +% Number of inner iterations +if restart > n + restart = n; +elseif restart <= 0 + restart = int32(1); +end + +% Determine the maximum number of outer iterations +max_outer_iters = int32(ceil(double(maxit)/double(restart))); + +% Initialize x +if isempty(x0) + x = zeros(n, 1); +else + x = x0; +end + +% Householder matrix or upper-triangular matix +V = zeros(n, restart); +R = zeros(restart, restart); + +% Temporary solution +y = zeros(restart+1, 1); + +% Preconditioned subspace +Z = zeros(n, restart); + +% Given's rotation vectors +J = zeros(2, restart); + +w = zeros(n, 1); + +A_1nrm = norm(A,1); % TODO: require a crs kernel for this + +% condition number array +inrm_buf = zeros(restart+1, 1); + +flag = int32(0); +iter = int32(0); +ref_iters = 0; +null_res_prev = realmax; +form_x_thres = min(sqrt(rtol),1e6); +for it_outer = 1:max_outer_iters + % Compute the initial residual + if it_outer > 1 || vec_sqnorm2(x) > 0 + w = ax_multiply(A, M, x, leftnull, w); + u = b - w; + else + u = b; + end + + beta2 = vec_sqnorm2(u); + beta = sqrt(beta2); + + % Prepare the first Householder vector + if u(1) < 0 + beta = -beta; + end + updated_norm = sqrt(2*beta2+2*real(u(1))*beta); + u(1) = u(1) + beta; + u = u / updated_norm; + + % The first Householder entry + y(1) = - beta; + V(:, 1) = u; + + j = int32(1); + % inner iteration steps + N = bitshift(1, it_outer+3); + inrm = 0; + nrm = 0; + while true + % Construct the last vector from the Householder reflectors + + % v = Pj*ej = ej - 2*u*u'*ej + v = -2 * conj(V(j, j)) * V(:, j); + v(j) = v(j) + 1; + % v = P1*P2*...Pjm1*(Pj*ej) + if isempty(coder.target) + % This is faster when interpreted + for i = (j - 1): - 1:1 + v = v - 2 * (V(:, i)' * v) * V(:, i); + end + else + % This is faster when compiled + for i = (j - 1): - 1:1 + s = conj(V(i, i)) * v(i); + for k = i + 1:n + s = s + conj(V(k, i)) * v(k); + end + s = 2 * s; + + for k = i:n + v(k) = v(k) - s * V(k, i); + end + end + end + % Explicitly normalize v to reduce the effects of round-off. + v = v / sqrt(vec_sqnorm2(v)); + %v = v/norm(v); + + % Store the preconditioned vector (A, M, N, u, b) + [v, ref_iter, w] = iter_refine(A, M, N, v, w, leftnull, iter); + ref_iters = ref_iters+ref_iter; + % what we have G*q, z + % NOTE, we also need to project of null space from preconditioned + % subspace component + v=mgs_null_proj(us,v); + + Z(:, j) = v; % preconditioned subspace component + w = ax_multiply(A, M, v, leftnull, w); + + % Orthogonalize the Krylov vector + % Form Pj*Pj-1*...P1*Av. + if isempty(coder.target) + % This is faster when interpreted + for i = 1:j + w = w - 2 * (V(:, i)' * w) * V(:, i); + end + else + % This is faster when compiled + for i = 1:j + s = conj(V(i, i)) * w(i); + for k = i + 1:n + s = s + conj(V(k, i)) * w(k); + end + s = s * 2; + + for k = i:n + w(k) = w(k) - s * V(k, i); + end + end + end + + % Update the rotators + % Determine Pj+1. + if j < n + % Construct u for Householder reflector Pj+1. + u(j) = 0; + u(j+1) = w(j+1); + alpha2 = conj(w(j+1)) * w(j+1); + for k = j + 2:n + u(k) = w(k); + alpha2 = alpha2 + conj(w(k)) * w(k); + end + + if alpha2 > 0 + alpha = sqrt(alpha2); + if u(j+1) < 0 + alpha = -alpha; + end + if j < restart + updated_norm = sqrt(2*alpha2+2*real(u(j+1))*alpha); + u(j+1) = u(j+1) + alpha; + for k = j + 1:n + V(k, j+1) = u(k) / updated_norm; + end + end + + % Apply Pj+1 to v. + w(j+2:end) = 0; + w(j+1) = - alpha; + end + end + + % Apply Given's rotations to the newly formed v. + for colJ = 1:j - 1 + tmpv = w(colJ); + w(colJ) = conj(J(1, colJ)) * w(colJ) + conj(J(2, colJ)) * w(colJ+1); + w(colJ+1) = - J(2, colJ) * tmpv + J(1, colJ) * w(colJ+1); + end + + % Compute Given's rotation Jm. + if j < n + rho = sqrt(w(j)'*w(j)+w(j+1)'*w(j+1)); + J(1, j) = w(j) ./ rho; + J(2, j) = w(j+1) ./ rho; + y(j+1) = - J(2, j) .* y(j); + y(j) = conj(J(1, j)) .* y(j); + w(j) = rho; + end + + R(1:j, j) = w(1:j); + % estimate the abs condition number of |R^{-T}|_\infty + % TODO, change this to 2-norm + [kappa, inrm, inrm_buf, nrm] = est_abs_cond(R, j, inrm, ... + inrm_buf, nrm); + + if iter >= maxit + flag = int32(1); % reached maxit + break + end + iter = iter + 1; + + %m2c_printf('At iteration %d, |R^{-T}|_\\infty is %1.14e.\n', iter, resid); + + if j >= restart + break; + end + + if kappa >= form_x_thres + x2 = x; + y2 = backsolve(R, y, j); + for i = 1:j + x2 = x2 + y2(i) * Z(:, i); + end + x2=mgs_null_proj(us,x2); + w = ax_multiply(A, M, x2, leftnull, w); + er = vec_1norm(w)/(A_1nrm*vec_1norm(x2)); + %fprintf('|Ax|_1/|A|_1/|x|_1=%g\n', er); + if er <= rtol + flag=int32(2); + break; + end + % safeguard preventing diverging + if er >= null_res_prev + j = j-1; + flag = int32(2); + ref_iters = ref_iters-ref_iter; + end + null_res_prev = er; + end + + j = j + 1; + end + + % Compute correction vector + y = backsolve(R, y, j); + for i = 1:j + x = x + y(i) * Z(:, i); + end + + % apply the projection for null space + x=mgs_null_proj(us,x); + + if flag + if flag == int32(2); flag = int32(0); end + break; + end +end + +% Normalize the null space +x = x/sqrt(vec_sqnorm2(x)); +ref_iter = int32(ref_iters); +end + +function b = ax_multiply(A, M, x, trans, b) +coder.inline('always'); +if issparse(A) + if ~trans; b = A * x; else; b = A' * x; end +else + if ~trans + b = crs_prodAx(M.A, x, b); + else + b = crs_prodAtx(M.A, x, b); + end +end +end + +function [kappa, inrm, inrm_buf, nrm] = est_abs_cond(R, i, ... + inrm, inrm_buf, nrm) +coder.inline('always'); +if i == 1 + inrm_buf(1) = 1./R(1,1); + inrm = abs(inrm_buf(1)); + nrm = abs(R(1,1)); +else + if isempty(coder.target) + s = inrm_buf(1:i-1)'*R(1:i-1,i); + else + s = 0.0; + for j = int32(1):i-1 + s = s+inrm_buf(j)*R(j,i); + end + end + k1 = 1-s; k2 = -1-s; + if abs(k1) beta_U + break; + end + if res <= beta_L; break; end +end +end + +function x = mgs_null_proj(us, x) +coder.inline('always'); +if isempty(us); return; end +% Orthogonalize via MGS +for i = 1:size(us,2) + u = us(:,i); + x = x - (u'*x)*u; +end +end + +function nrm1 = vec_1norm(v) +coder.inline('always'); +n = int32(size(v, 1)); +nrm1 = 0; +for i = int32(1):n; nrm1 = nrm1 + abs(v(i)); end +end diff --git a/ksp/orthNulls.m b/ksp/orthNulls.m new file mode 100644 index 0000000..c7d573e --- /dev/null +++ b/ksp/orthNulls.m @@ -0,0 +1,94 @@ +function [vs, iters, ref_iters] = orthNulls(A, M, nNull, leftnull, ... + isSig, restart, rtol, maxit) +%orthNulls Computes orthonormal null-space vectors using HIFIR +% +% Syntax: +% vs = orthNulls(A, M, nNull, leftnull, isSig, restart, rtol, maxit) +% [vs, iters, ref_iters] = orthNulls(...) +% +% Description: +% Small-dimension nullspace computation based on HIFIR and FGMRES. nNull +% is the dimension of the nullspace. isSig indicates whether or not the +% HIF preconditioner is singular, which can be determined by checking the +% final Schur complement, i.e., info.schur_rank == info.schur_size. +% +% See also fgmresNull + +if issparse(A) + n = int32(size(A, 1)); +else + n = A.nrows; +end +vs = create_rand_orth(n, nNull); +iters = zeros(nNull, 1, 'int32'); +ref_iters = iters; +nullIter = int32(1); +while true + if ~isSig + vs(:, nullIter) = iter_refine(A, M, 16, vs(:, nullIter), true); + vs(:, nullIter) = vs(:, nullIter) / norm(vs(:, nullIter)); + end + [vs(:, nullIter), flag, iters(nullIter), ref_iters(nullIter)] = ... + fgmresNull(A, vs(:, nullIter), [], leftnull, restart, rtol, ... + maxit, M, [], []); + if flag + nullIter = nullIter - 1; + break; + end + if nullIter >= nNull; break; end + nullIter = nullIter + 1; +end +if nullIter <= 0; vs = []; end +if ~isempty(vs) + if nullIter < size(vs, 2) + vs = vs(:, 1:nullIter); + end + [vs, ~] = qr(vs, 0); % reorth +end +end + +function vs = create_rand_orth(n, m) +[vs, ~] = qr(rand(n, m), 0); +end + +function b = ax_multiply(A, M, x, trans, b) +coder.inline('always'); +if issparse(A) + if ~trans; b = A * x; else; b = A' * x; end +else + if ~trans + b = crs_prodAx(M.A, x, b); + else + b = crs_prodAtx(M.A, x, b); + end +end +end + +function [x, iter] = iter_refine(A, M, N, b, trans) +% applying iterative refinement A*x=b +bnorm = sqrt(vec_sqnorm2(b)); +beta_L = 0.2; +beta_U = 1e8; +% beta_stag = 0; +% res_prev = 1; +x = zeros(size(b)); +r = b; +w = b; +for iter = int32(1):N + if isa(M, 'function_handle') + % TODO: handle transpose + r = M(r); + else + if trans; op = 'SH'; else; op = 'S'; end + r = M.apply(r, op, -1); + end + x = x+r; + w = ax_multiply(A, M, x, trans, w); + r = b - w; + res = sqrt(vec_sqnorm2(r))/bnorm; + if res > beta_U + break; + end + if res <= beta_L; break; end +end +end \ No newline at end of file diff --git a/ksp/private/crs_Axpy.m b/ksp/private/crs_Axpy.m deleted file mode 100644 index b4ff6e0..0000000 --- a/ksp/private/crs_Axpy.m +++ /dev/null @@ -1,119 +0,0 @@ -function y = crs_Axpy(A, x, y, nthreads) -%crs_Axpy Compute y=y+A*x for a sparse matrix A in CRS format, -% assuming that number of columns of A is equal to size(x,1). -% -% y = crs_Axpy(A, x ,y) -% Computes y=A*x+y in serial. -% -% y = crs_Axpy(A, x, y, nthreads) -% Computes y=A*x+y Testing nthreads OpenMP threads, where nthreads is an int32. -% In this mode, there are implicit barriers in the function. -% -% y = crs_Axpy(A, x, y, []) -% Computes y=A*x+y locally within each OpenMP thread, assuming the -% parallel region has already been initialized. The argument y has -% been preallocated. In this mode, there is no implicit barrier. -% -% y = crs_Axpy(A, x, y, nthreads, comm) -% also performs communication within the MPI communicator. -% It assumes that A is partitioned columnwise, and hence A*x is -% a partial sum on each process. An allreduce is performed on y -% within the communicator. -% -% Note that if A is partitioned rowwise, then y must be duplicated -% on all processes, and prodAx should be called without the communicator. -% -% y = crs_Axpy(A, x, y, nthreads, comm, pgmsg [, pgsz]) -% specifies an additional message that should be summed during -% allreduce and strored in y. This is to piggy-back on allreduce -% to help aggregate small messages to optimize MPI communication. -% -% The compiled version reqiures A and x to be double precision. -% The M file works for both single and double precision. - -% See http://www.netlib.org/linalg/html_templates/node98.html - -%#codegen -args {crs_matrix, coder.typeof(0, [inf,inf]), coder.typeof(0, [inf,inf]),int32(0)} -%#codegen crs_Axpy_ser1 -args {crs_matrix, coder.typeof(0, [inf,inf]), coder.typeof(0, [inf,inf])} - -if size(y, 1) < A.nrows || size(y, 2) < size(x, 2) - m2c_error('crs_Axpy:BufferTooSmal', 'Buffer space for output y is too small.'); -end - - -if nargin > 3 && ~isempty(nthreads) - %% Declare parallel region - if ~ompGetNested && ompGetNumThreads > 1 && nthreads(1) > 1 - OMP_begin_master - m2c_warn('crs_Axpy:NestedParallel', ... - 'You are trying to use nested parallel regions. Solution may be incorrect.'); - OMP_end_master - end - - OMP_begin_parallel(int32(nthreads(1))); - - %% Compute y=A*x+y - y = crs_Axpy_kernel(A.row_ptr, A.col_ind, A.val, x, int32(size(x, 1)), ... - y, int32(size(y, 1)), A.nrows, int32(size(x, 2)), ompGetNumThreads > 1); - - %% End parallel region - OMP_end_parallel; -elseif nargin < 4 || ompGetNumThreads == 1 - %% Compute y=A*x+y - y = crs_Axpy_kernel(A.row_ptr, A.col_ind, A.val, x, int32(size(x, 1)), ... - y, int32(size(y, 1)), A.nrows, int32(size(x, 2))); -else - %% Compute y=A*x+y - y = crs_Axpy_kernel(A.row_ptr, A.col_ind, A.val, x, int32(size(x, 1)), ... - y, int32(size(y, 1)), A.nrows, int32(size(x, 2)), true); -end - -function y = crs_Axpy_kernel(row_ptr, col_ind, val, ... - x, x_m, y, b_m, nrows, nrhs, varargin) - -coder.inline('never'); -if ~isempty(varargin) && varargin{1} - [istart, iend] = OMP_local_chunk(nrows); -else - istart = int32(1); iend = nrows; -end - -xoffset = int32(0); boffset = int32(0); -for k = 1:nrhs - for i = istart:iend - t = y(boffset+i); - - for j = row_ptr(i):row_ptr(i+1) - 1 - t = t + val(j) * x(xoffset+col_ind(j)); - end - y(boffset+i) = t; - end - xoffset = xoffset + x_m; boffset = boffset + b_m; -end - -function test %#ok -%!test -%! if ~exist(['crs_Axpy.' mexext], 'file') -%! m=100; n = 20; -%! else -%! m=10000; n = 2000; -%! end -%! tic; sp = sprand(m,n,0.5); x=rand(size(sp,2),2); -%! fprintf(1, '\n\tGenerated random matrix in %g seconds\n', toc); -%! tic; b0 = sp*x + ones(m, 2); -%! fprintf(1, '\tComputed reference solution in %g seconds\n', toc); -%! tic; A = crs_matrix(sp); -%! fprintf(1, '\tConverted into crs_matrix in %g seconds\n', toc); -%! fprintf(1, '\tTesting serial: '); -%! tic; b1 = crs_Axpy(A, x, ones(m, 2)); -%! fprintf(1, 'Done in %g seconds\n ', toc); -%! assert(norm(b0-b1)<=1.e-10); - -%! b2 = zeros(size(sp,1),2); -%! for nthreads=int32([1 2 4 8]) -%! if nthreads>ompGetMaxThreads; break; end -%! fprintf(1, '\tTesting %d thread(s): ', nthreads); -%! tic; b2 = crs_Axpy(A, x, ones(m, 2), nthreads); -%! fprintf(1, 'Done in %g seconds\n ', toc); -%! assert(norm(b0-b2)<=1.e-10); -%! end diff --git a/ksp/private/crs_prodAx.m b/ksp/private/crs_prodAx.m new file mode 100644 index 0000000..cac95e5 --- /dev/null +++ b/ksp/private/crs_prodAx.m @@ -0,0 +1,38 @@ +function b = crs_prodAx(A, x, b) +%crs_prodAx Compute b=A*x for a sparse matrix A in CRS format, +% assuming that number of columns of A is equal to size(x,1). +% +% b = crs_prodAx(A, x [,b]) +% Computes b=A*x in serial. +% +% See http://www.netlib.org/linalg/html_templates/node98.html + +if nargin == 2 + b = coder.nullcopy(zeros(A.nrows, size(x, 2))); +else + if size(b, 1) < A.nrows || size(b, 2) < size(x, 2) + error('crs_prodAx:BufferTooSmal', 'Buffer space for output b is too small.'); + end +end + +%% Compute b=A*x +b = crs_prodAx_kernel(A.row_ptr, A.col_ind, A.val, x, int32(size(x, 1)), ... + b, int32(size(b, 1)), A.nrows, int32(size(x, 2))); + +function b = crs_prodAx_kernel(row_ptr, col_ind, val, ... + x, x_m, b, b_m, nrows, nrhs) + +istart = int32(1); iend = nrows; + +xoffset = int32(0); boffset = int32(0); +for k = 1:nrhs + for i = istart:iend + t = 0.0; + for j = row_ptr(i):row_ptr(i+1) - 1 + t = t + val(j) * x(xoffset+col_ind(j)); + end + b(boffset+i) = t; + end + xoffset = xoffset + x_m; boffset = boffset + b_m; +end + diff --git a/mex/HIF.m b/mex/HIF.m deleted file mode 100644 index e9b5acc..0000000 --- a/mex/HIF.m +++ /dev/null @@ -1,112 +0,0 @@ -classdef HIF - %HIF - OOP class representation of HIFIR4M project - % - % Description: - % This class provides another way to use this package in an OOP - % fashion, which can be benefitial for certain programmers. - - properties (Access = protected) - dbase % internal database structure - end - - methods - function obj = HIF(varargin) - %HIF - Construct an instance with internal database - % - % Syntax: - % hl = HIF - % hl = HIF(true) - % hl = HIF(___, true) - % - % Description: - % hl = HIF simpily constructs an instance without mixed - % precision support. - % - % hl = HIF(true) in constrast to the constructor above, - % this one enables mixed precision. - % - % See Also: HIFIR4M_INITIALIZE - obj.dbase = hifir4m_initialize(varargin{:}); - end - function flag = is_mixed(obj) - %IS_MIXED - Check if the HIFIR instance is mixed precision - % - % See Also: ID - flag = obj.dbase.is_mixed; - end - function flag = is_real(obj) - %IS_REAL - Check if the HIFIR instance is real arithmetic - % - % See Also: ID - flag = obj.dbase.is_real; - end - function id_ = id(obj) - %ID - Get the ID tag of the database - % - % See Also: IS_MIXED - id_ = obj.dbase.id; - end - function flag = empty(obj) - %EMPTY - Check emptyness - % - % See Also: ID - flag = hifir4m_empty(obj.dbase); - end - function varargout = factorize(obj, A, varargin) - %FACTORIZE - Perform MLILU factorization - % - % See Also: HIFIR4M_FACTORIZE, M_SOLVE - [varargout{1:nargout}] = hifir4m_factorize(obj.dbase, A, ... - varargin{:}); - end - function info = query_stats(obj) - %QUERY_INFO - Qeury factorization stats - % - % See Also: HIFIR4M_QUERY_STATS - info = hifir4m_query_stats(obj.dbase); - end - function hilu = export(obj, varargin) - %EXPORT - Export internal data in C++ to MATLAB - % - % See Also: HIFIR4M_EXPORT - hilu = hifir4m_export(obj.dbase, varargin{:}); - end - function varargout = solve(obj, varargin) - %SOLVE - Accessing inv(M) - % - % See Also: HIFIR4M_SOLVE, FACTORIZE - [varargout{1:nargout}] = hifir4m_solve(obj.dbase, ... - varargin{:}); - end - function varargout = hifir(obj, varargin) - %HIFIR - Accessing inv(M) with iterative refinement - % - % See Also: HIFIR4M_SOLVE, FACTORIZE - [varargout{1:nargout}] = hifir4m_hifir(obj.dbase, ... - varargin{:}); - end - function varargout = mmultiply(obj, varargin) - %MMULTIPLY - Perform M*x - % - % See Also: HIFIR4M_MMULTIPLY, FACTORIZE - [varargout{1:nargout}] = hifir4m_mmultiply(obj.dbase, ... - varargin{:}); - end - function varargout = fgmres(obj, A, b, varargin) - %FGMRES - Solving a system with FGMRES - % - % See Also: HIFIR4M_FGMRES, FACTORIZE - [varargout{1:nargout}] = hifir4m_fgmres(obj.dbase, A, b, ... - varargin{:}); - end - function clear(obj) - %CLEAR - Clear the internal storage - hifir4m_clear(obj.dbase); - end - function delete(obj) - % destructor - hifir4m_finalize(obj.dbase); - end - end -end - diff --git a/mex/hifir4m.hpp b/mex/hifir4m.hpp index b65b2bd..92343e8 100644 --- a/mex/hifir4m.hpp +++ b/mex/hifir4m.hpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -31,8 +32,6 @@ #include #include "hifir4m_config.hpp" -// avoid sorting! -#include "HIF.hpp" #include "hifir4m_num_array.hpp" namespace hifir4m { @@ -41,78 +40,9 @@ namespace hifir4m { #ifdef HIFIR4M_USE_32INT typedef int integer_type; #else -typedef mwSignedIndex integer_type; +typedef INT64_T integer_type; #endif -class MexNspFilter : public hif::NspFilter { - public: - using base = hif::NspFilter; - - explicit MexNspFilter(const std::size_t start = 0, - const std::size_t end = static_cast(-1)) - : base(start, end) {} - - virtual ~MexNspFilter() { - if (x_in) mxDestroyArray(x_in); - } - - std::string f_name = "#unknown#"; ///< function to evaluate - mxArray *f_handle = nullptr; ///< function handle - mutable mxArray *x_in = nullptr; ///< input "wrapper" of x - - virtual void user_filter(void *x, const std::size_t n, - const char dtype) const override { - if (dtype != 'd') - mexErrMsgIdAndTxt("hifir4m:nspFilter:badDtype", - "only double precision is supported"); - if (f_name == "#unknown#") - mexErrMsgIdAndTxt("hifir4m:nspFilter:unknownFunc", "unset function name"); - if (f_name == "feval" && !f_handle) - mexErrMsgIdAndTxt("hifir4m:nspFilter:missingHandler", - "missing function handle"); - if (!x_in) { - if (dtype == 'd') - x_in = mxCreateDoubleMatrix(n, 1, mxREAL); - else - x_in = mxCreateDoubleMatrix(n, 1, mxCOMPLEX); - } - if (n != mxGetM(x_in)) - mexErrMsgIdAndTxt("hifir4m:nspFilter:badShape", "unmatched sizes"); - // copy x to buffer - if (dtype == 'd') - std::copy_n(reinterpret_cast(x), n, mxGetPr(x_in)); - else - std::copy_n(reinterpret_cast *>(x), n, - reinterpret_cast *>(mxGetData(x_in))); - mxArray *rhs[2]; - const mwSize nrhs = f_handle ? 2 : 1; - if (f_handle) { - // if using function handle, then the first one is the function handle, - // while the input argument is given following it - rhs[0] = f_handle; - rhs[1] = x_in; - } else - rhs[0] = x_in; - // NOTE: We have to create a new array each time to ensure the safety of - // MATLAB runtime system. - // TODO: can we avoid this? - mxArray *lhs; - // call MATLAB directly - mexCallMATLAB(1, &lhs, nrhs, rhs, f_name.c_str()); - // copy back to data to HIFIR - if (dtype == 'd') - std::copy_n(mxGetPr(lhs), n, reinterpret_cast(x)); - else - std::copy_n( - reinterpret_cast *>(mxGetData(lhs)), n, - reinterpret_cast *>(x)); - mxDestroyArray(lhs); // free the array - } - - // function to enable user override function option - inline void enable_or() { _type = USER_OR; } -}; - /** * @brief Database structure * @@ -134,25 +64,25 @@ struct HIFIR4M_Database { using prec_t = typename std::conditional, hif::HIF>::type; - ///< preconditioner type - using ksp_factory_t = hif::ksp::KSPFactory; - ///< KSP factory type - using solver_t = typename ksp_factory_t::fgmres; ///< FMGRES type ///< updated operator type - static constexpr bool IS_MIXED = IsMixed; ///< mixed flag - static constexpr bool IS_REAL = _IS_REAL; ///< real flag + static constexpr bool IS_MIXED = IsMixed; ///< mixed flag + static constexpr bool IS_REAL = _IS_REAL; ///< real flag // attributes - std::shared_ptr M; ///< preconditioner attribute - std::shared_ptr ksp; ///< KSP solver + std::shared_ptr M; ///< preconditioner attribute }; enum { - HIFIR4M_CREATE = 0, ///< create database - HIFIR4M_GET = 1, ///< get database - HIFIR4M_CLEAR = 2, ///< clear database - HIFIR4M_CHECK = 3, ///< check emptyness - HIFIR4M_DESTROY = 4, ///< destroy database + HIFIR4M_CREATE = 0, ///< create database + HIFIR4M_GET = 1, ///< get database + HIFIR4M_CLEAR = 2, ///< clear database + HIFIR4M_CHECK = 3, ///< check emptyness + HIFIR4M_DESTROY = 4, ///< destroy database + HIFIR4M_FACTORIZE = 5, ///< Factorize + HIFIR4M_M_SOLVE = 6, ///< preconditioner solve + HIFIR4M_M_MULTIPLY = 7, ///< matrix vector product + HIFIR4M_EXPORT_DATA = 8, ///< export data + HIFIR4M_QUERY = 9, ///< query factorization info }; /** @@ -166,7 +96,7 @@ enum { */ template inline HIFIR4M_Database *database(const int action, - int &id) { + int & id) { // create database using data_t = HIFIR4M_Database; static std::vector> pool; @@ -220,40 +150,46 @@ inline HIFIR4M_Database *database(const int action, */ inline hif::Options create_opt_from_struct(const mxArray *rhs) { if (!mxIsStruct(rhs)) - mexErrMsgIdAndTxt("hifir4m:options:badInput", "input must be structure"); + mexErrMsgIdAndTxt("hifir4m:params:badInput", "input must be structure"); hif::Options opt = hif::get_default_options(); // WARNING! The structure from MATLAB should be initialized by a structured // routine with the following fields that are accessed. In addition, the // index must align with the order in C++ struct of Options - auto get_field = [&](const int field) -> double { - return mxGetScalar(mxGetFieldByNumber(rhs, 0, field)); + auto get_field = [&](const char *field) -> double { + auto *fd = mxGetField(rhs, 0, field); + if (!fd) + mexErrMsgIdAndTxt("hifir4m:params:invalidField", "%s is unknown field", + field); + return mxGetScalar(fd); }; - opt.tau_L = get_field(0); - opt.tau_U = get_field(1); - opt.kappa_d = get_field(2); - opt.kappa = get_field(3); - opt.alpha_L = get_field(4); - opt.alpha_U = get_field(5); - opt.rho = get_field(6); - opt.c_d = get_field(7); - opt.c_h = get_field(8); - opt.N = get_field(9); - opt.verbose = get_field(10); - opt.rf_par = get_field(11); - opt.reorder = get_field(12); - opt.spd = get_field(13); - opt.check = get_field(14); - opt.pre_scale = get_field(15); - opt.symm_pre_lvls = get_field(16); - opt.threads = get_field(17); - opt.mumps_blr = get_field(18); - opt.fat_schur_1st = get_field(19); - opt.rrqr_cond = get_field(20); - opt.pivot = get_field(21); - opt.gamma = get_field(22); - opt.beta = get_field(23); - opt.is_symm = get_field(24); - opt.no_pre = get_field(25); + opt.tau_L = get_field("tau_L"); + opt.tau_U = get_field("tau_U"); + opt.kappa_d = get_field("kappa_d"); + opt.kappa = get_field("kappa"); + opt.alpha_L = get_field("alpha_L"); + opt.alpha_U = get_field("alpha_U"); + opt.rho = get_field("rho"); + opt.c_d = get_field("c_d"); + opt.c_h = get_field("c_h"); + opt.N = get_field("N"); + opt.verbose = get_field("verbose"); + opt.rf_par = get_field("rf_par"); + opt.reorder = get_field("reorder"); + opt.spd = get_field("spd"); + opt.check = get_field("check"); + opt.pre_scale = get_field("pre_scale"); + opt.symm_pre_lvls = get_field("symm_pre_lvls"); + opt.threads = get_field("threads"); + opt.mumps_blr = get_field("mumps_blr"); + opt.fat_schur_1st = get_field("fat_schur_1st"); + opt.rrqr_cond = get_field("rrqr_cond"); + opt.pivot = get_field("pivot"); + opt.gamma = get_field("gamma"); + opt.beta = get_field("beta"); + opt.is_symm = get_field("is_symm"); + opt.no_pre = get_field("no_pre"); + opt.nzp_thres = get_field("nzp_thres"); + opt.dense_thres = get_field("dense_thres"); return opt; } @@ -288,21 +224,19 @@ inline void convert_crs_mx2pointer(const std::string &prefix, mexErrMsgIdAndTxt((prefix + ":badIntDtype").c_str(), "rowptr and colind must be int64"); #endif - const auto n = mxGetM(rowptr) - 1; + const auto n = mxGetM(rowptr) - 1; integer_type *rptr = (integer_type *)mxGetData(rowptr), *cptr = (integer_type *)mxGetData(colind); - const auto nnz = rptr[n] - rptr[0]; + const auto nnz = rptr[n] - rptr[0]; if (mxGetM(colind) < (mwSize)nnz) mexErrMsgIdAndTxt((prefix + ":badLength").c_str(), "bad nnz length %d", nnz); - if (*rptr == 1) { - // convert to zero based - for (mwSize i = 0; i < n + 1; ++i) --rptr[i]; - for (mwSize i = 0; i < (mwSize)nnz; ++i) --cptr[i]; - } + if (*rptr != 1 && *rptr != 0) + mexErrMsgIdAndTxt((prefix + ":badIndexBase").c_str(), + "must be {0,1}-based"); *rptr_ = rptr; *cptr_ = cptr; - *n_ = n; + *n_ = n; } // factorization @@ -330,9 +264,9 @@ inline double factorize(int id, const mxArray *rowptr, const mxArray *colind, auto data = database(HIFIR4M_GET, id); - mwSize n; - integer_type *rptr, *cptr; - ValueType *vptr; + mwSize n; + integer_type * rptr, *cptr; + ValueType * vptr; std::vector buf; // only used in complex and no interleaved convert_crs_mx2pointer(std::string("hifir4m:factorize"), rowptr, colind, &rptr, &cptr, &n); @@ -342,8 +276,6 @@ inline double factorize(int id, const mxArray *rowptr, const mxArray *colind, hif::CRS A(n, n, rptr, cptr, vptr, true); // get options const auto opts = create_opt_from_struct(opt); - const auto elim_nz = A.eliminate(1e-15, true); - if (opts.verbose) hif_info("eliminated %zd small entries in A", elim_nz); if (!data->M) data->M.reset(new typename data_t::prec_t()); hif::DefaultTimer timer; timer.start(); @@ -411,48 +343,6 @@ inline double M_solve(int id, const mxArray *rhs, mxArray *lhs, return timer.time(); // give M solve time to the user } -#if 0 -/** - * @brief Accessing inv(M) with 2 RHS (for complex conjugate pair) - * - * @tparam IsMixed Wether or not the database uses mixed precision - * @tparam ValueType Data type used, e.g., \a double or \a complex - * @param[in] id ID tag of the database - * @param[in] rhs right-hand side vector (nx2) - * @param[out] lhs left-hand side result, i.e., lhs=inv(A)*rhs (nx2) - * @param[in] r (optional) rank for the final Schur complement - * @return double overhead-free wall-clock time - */ -template -inline double M_solve2(int id, const mxArray *rhs, mxArray *lhs, - const std::size_t r = 0u) { - auto data = database(HIFIR4M_GET, id); - - if (!data->M) - mexErrMsgIdAndTxt("hifir4m:M_solve2:emptyM", "M has not yet factorized"); - - if (mxGetM(lhs) != 2u || mxGetM(rhs) != 2u) - mexErrMsgIdAndTxt("hifir4m:M_solve2:badRhsSize", "rhs/lhs must be 2-by-n"); - if (mxGetN(rhs) != mxGetN(lhs) || mxGetN(rhs) != data->M->nrows()) - mexErrMsgIdAndTxt("hifir4m:M_solve2:badRhsSize", - "rhs size does not agree with lhs or M"); - - using array_t = hif::Array>; - array_t b(data->M->nrows(), (std::array *)mxGetData(rhs), true), - x(data->M->nrows(), (std::array *)mxGetData(lhs), true); - hif::DefaultTimer timer; - timer.start(); - try { - data->M->solve_mrhs(b, x, r); - } catch (const std::exception &e) { - mexErrMsgIdAndTxt("hifir4m:M_solve:failedSolve", - "M_solve failed with message:\n%s", e.what()); - } - timer.finish(); - return timer.time(); // give M solve time to the user -} -#endif - // M solve with inner iteration /** @@ -495,14 +385,27 @@ inline double M_solve(int id, const mxArray *rowptr, const mxArray *colind, mexErrMsgIdAndTxt("hifir4m:M_solve_inner:badRhsSize", "rhs size does not agree with lhs or M"); - mwSize n; - integer_type *rptr, *cptr; - ValueType *vptr; + mwSize n; + integer_type * rptr, *cptr; + ValueType * vptr; std::vector buf; convert_crs_mx2pointer(std::string("hifir4m:M_solve_inner"), rowptr, colind, &rptr, &cptr, &n); get_num_data(val, &vptr, buf); + bool local_buf = false; + if (*rptr != 0) { + local_buf = true; + integer_type *ptr_bak = rptr; + rptr = (integer_type *)mxMalloc((n + 1) * sizeof(integer_type)); + const auto index_base = ptr_bak[0]; + for (mwSize i(0); i < n + 1; ++i) rptr[i] = ptr_bak[i] - index_base; + ptr_bak = cptr; + const mwSize nnz = rptr[n]; + cptr = (integer_type *)mxMalloc(sizeof(integer_type) * nnz); + for (mwSize i(0); i < nnz; ++i) cptr[i] = ptr_bak[i] - index_base; + } + // create csr wrapper from HIFIR hif::CRS A(n, n, rptr, cptr, vptr, true); using array_t = hif::Array; @@ -522,6 +425,10 @@ inline double M_solve(int id, const mxArray *rowptr, const mxArray *colind, } timer.finish(); set_num_data(lhs, lhs_ptr); + if (local_buf) { + mxFree(rptr); + mxFree(cptr); + } return timer.time(); // give M solve time to the user } @@ -577,220 +484,6 @@ inline double M_multiply(int id, const mxArray *rhs, mxArray *lhs, return timer.time(); // give M solve time to the user } -// KSP solve - -/** - * @brief Solving with FGMRES solver - * - * @tparam IsMixed Wether or not the database uses mixed precision - * @tparam ValueType Data type used, e.g., \a double or \a complex - * @param[in] id ID tag of the database - * @param[in] restart Restart of GMRES (30) - * @param[in] max_iter Maximum iteration allowed (500) - * @param[in] rtol Relative tolerance for residual convergence (1e-6) - * @param[in] verbose Verbose flag (true) - * @param[in] rowptr rowptr row pointer mex array (int32) - * @param[in] colind colind column index mex array (int32) - * @param[in] val value mex array (double) - * @param[in] rhs Right-hand side b vector - * @param[in,out] lhs Left-hand side solution and initial guess vector - * @return A tuple of flag, iterations, final residual and overhead-free - * wall-clock time are returned in this routine. - */ -template -inline std::tuple KSP_solve( - int id, const int restart, const int max_iter, const double rtol, - const bool verbose, const mxArray *rowptr, const mxArray *colind, - const mxArray *val, const mxArray *rhs, mxArray *lhs, - const int iter_refines = 1, const int *cst_nsp = nullptr, - const char *fname = nullptr, const mxArray *fhdl = nullptr) { - using ksp_t = typename HIFIR4M_Database::solver_t; - if (!std::is_floating_point::value && !mxIsComplex(rhs) && - !mxIsComplex(lhs) && !mxIsComplex(val)) - mexErrMsgIdAndTxt("hifir4m:M_solve:complex", "input array is not complex."); - - auto data = database(HIFIR4M_GET, id); - if (!data->M) - mexErrMsgIdAndTxt("hifir4m:KSP_solve:emptyM", "M has not yet factorized"); - - if (mxGetN(lhs) != 1u || mxGetN(rhs) != 1u) - mexErrMsgIdAndTxt("hifir4m:KSP_solve:badRhsSize", - "rhs/lhs must be column vector"); - // get matrix - mwSize n; - integer_type *rptr, *cptr; - ValueType *vptr; - std::vector buf; - convert_crs_mx2pointer(std::string("hifir4m:KSP_solve"), rowptr, colind, - &rptr, &cptr, &n); - get_num_data(val, &vptr, buf); - - if (mxGetM(rhs) != mxGetM(lhs) || mxGetM(rhs) != data->M->nrows() || - data->M->nrows() != n) - mexErrMsgIdAndTxt("hifir4m:KSP_solve:badRhsSize", - "rhs size does not agree with lhs, M or A"); - - data->ksp.reset(new ksp_t(data->M)); - auto &ksp = *data->ksp; - ksp.set_M(data->M); // setup preconditioner - - // create csr wrapper from HIFIR - hif::CRS A(n, n, rptr, cptr, vptr, true); - // arrays - using array_t = hif::Array; - ValueType *rhs_ptr, *lhs_ptr; - // only needed in non-interleaved complex - std::vector rhs_buf, lhs_buf; - get_num_data(rhs, &rhs_ptr, rhs_buf); - get_num_data(lhs, &lhs_ptr, lhs_buf); - array_t b(n, rhs_ptr, true), x(n, lhs_ptr, true); - - if (ksp.is_arnoldi() && restart > 0) ksp.set_restart_or_cycle(restart); - if (max_iter > 0) ksp.set_maxit(max_iter); - if (rtol > 0.0) ksp.set_rtol(rtol); - const int irs = iter_refines > 0 ? iter_refines : 1; - ksp.set_inner_steps(irs); - - // enable const null space filter - if (cst_nsp) - data->M->nsp.reset(new MexNspFilter(cst_nsp[0] - 1, cst_nsp[1])); - else if (fname) { - if (verbose) - hif_info( - "setting up user (right) null space filter with\n" - "\tcallback name: %s\n" - "\tfunction handle: %s", - fname, (fhdl ? "yes" : "no")); - // setup user filter - data->M->nsp.reset(new MexNspFilter()); - MexNspFilter *mex_nsp = dynamic_cast(data->M->nsp.get()); - if (!mex_nsp) - mexErrMsgIdAndTxt("hifir4m:KSP_solve:badNsp", - "failed to dynamic_cast nullspace filter"); - mex_nsp->f_name = fname; - if (fhdl) mex_nsp->f_handle = const_cast(fhdl); - mex_nsp->enable_or(); - } - - hif::DefaultTimer timer; - int flag; - std::size_t iters; - const int kernel_type = - irs <= 1 ? hif::ksp::TRADITION : hif::ksp::ITERATIVE_REFINE; - timer.start(); - try { - std::tie(flag, iters) = - ksp.solve(A, b, x, kernel_type, true /* always with guess */, verbose); - } catch (const std::exception &e) { - mexErrMsgIdAndTxt("hifir4m:KSP_solve:failedSolve", - "KSP_solve failed with message:\n%s", e.what()); - } - timer.finish(); - const double tt = timer.time(); - const double res = data->ksp->get_resids().back(); - data->ksp.reset(); // free - if (data->M->nsp) data->M->nsp.reset(); // release const nullspace filter - set_num_data(lhs, lhs_ptr); - return std::make_tuple(flag, (int)iters, res, tt); -} - -#if 0 -/** - * @brief Solve left null space with GMRES - * - * @tparam IsMixed Wether or not the database uses mixed precision - * @tparam ValueType Data type used, e.g., \a double or \a complex - * @tparam UseHi using hi-precision kernels - * @param[in] id ID tag of the database - * @param[in] restart Restart of GMRES (30) - * @param[in] max_iter Maximum iteration allowed (500) - * @param[in] rtol Relative tolerance for residual convergence (1e-6) - * @param[in] verbose Verbose flag (true) - * @param[in] rowptr rowptr row pointer mex array (int32) - * @param[in] colind colind column index mex array (int32) - * @param[in] val value mex array (double) - * @param[in] rhs Right-hand side b vector - * @param[in,out] lhs Left-hand side solution and initial guess vector - * @return A tuple of flag, iterations, final residual and overhead-free - * wall-clock time are returned in this routine. - */ -template -inline std::tuple KSP_null_solve( - int id, const int restart, const int max_iter, const double rtol, - const bool verbose, const mxArray *rowptr, const mxArray *colind, - const mxArray *val, const mxArray *rhs, mxArray *lhs) { - using ksp_t = typename HIFIR4M_Database::null_solver_t; - using ksp_hi_t = - typename HIFIR4M_Database::null_hi_solver_t; - - auto data = database(HIFIR4M_GET, id); - if (!data->M) - mexErrMsgIdAndTxt("hifir4m:KSP_null_solve:emptyM", - "M has not yet factorized"); - - if (mxGetN(lhs) != 1u || mxGetN(rhs) != 1u) - mexErrMsgIdAndTxt("hifir4m:KSP_null_solve:badRhsSize", - "rhs/lhs must be column vector"); - // get matrix - mwSize n; - integer_type *rptr, *cptr; - ValueType *vptr; - convert_crs_mx2pointer(std::string("hifir4m:KSP_solve"), rowptr, colind, val, - &rptr, &cptr, &vptr, &n); - - if (mxGetM(rhs) != mxGetM(lhs) || mxGetM(rhs) != data->M->nrows() || - data->M->nrows() != n) - mexErrMsgIdAndTxt("hifir4m:KSP_solve:badRhsSize", - "rhs size does not agree with lhs, M or A"); - - // create csr wrapper from HIFIR - hif::CRS A(n, n, rptr, cptr, vptr, true); - // arrays - using array_t = hif::Array; - array_t b(n, (ValueType *)mxGetData(rhs), true), - x(n, (ValueType *)mxGetData(lhs), true); - - if (!UseHi) { - data->ksp_null.reset(new ksp_t(data->M)); - auto &ksp = *data->ksp_null; - ksp.set_M(data->M); // setup preconditioner - if (ksp.is_arnoldi() && restart > 0) ksp.set_restart_or_cycle(restart); - if (max_iter > 0) ksp.set_maxit(max_iter); - if (rtol > 0.0) ksp.set_rtol(rtol); - } else { - data->ksp_null_hi.reset(new ksp_hi_t(data->M)); - auto &ksp = *data->ksp_null_hi; - ksp.set_M(data->M); // setup preconditioner - if (ksp.is_arnoldi() && restart > 0) ksp.set_restart_or_cycle(restart); - if (max_iter > 0) ksp.set_maxit(max_iter); - if (rtol > 0.0) ksp.set_rtol(rtol); - } - - hif::DefaultTimer timer; - int flag; - std::size_t iters; - timer.start(); - try { - if (!UseHi) - std::tie(flag, iters) = data->ksp_null->solve( - A, b, x, hif::ksp::TRADITION, true /* always with guess */, verbose); - else - std::tie(flag, iters) = data->ksp_null_hi->solve( - A, b, x, hif::ksp::TRADITION, true /* always with guess */, verbose); - } catch (const std::exception &e) { - mexErrMsgIdAndTxt("hifir4m:KSP_solve:failedSolve", - "KSP_solve failed with message:\n%s", e.what()); - } - timer.finish(); - const double tt = timer.time(); - const double res = !UseHi ? data->ksp_null->get_resids().back() - : data->ksp_null_hi->get_resids().back(); - !UseHi ? data->ksp_null.reset() : data->ksp_null_hi.reset(); // free - return std::make_tuple(flag, (int)iters, res, tt); -} - -#endif - /** * @brief Extract the internal data from HIFIR * @@ -802,8 +495,8 @@ inline std::tuple KSP_null_solve( */ template inline void M_export(int id, const bool destroy, mxArray **hilu) { - using data_t = HIFIR4M_Database; - using crs_t = typename data_t::prec_t::crs_type; + using data_t = HIFIR4M_Database; + using crs_t = typename data_t::prec_t::crs_type; using value_t = typename crs_t::value_type; static constexpr mxComplexity DTYPE = std::is_floating_point::value ? mxREAL : mxCOMPLEX; @@ -816,15 +509,6 @@ inline void M_export(int id, const bool destroy, mxArray **hilu) { return; } - if (!data->M->can_export()) { - mexWarnMsgIdAndTxt( - "hifir4m:M_export:cannot_export", - "Exporting data is not allowed. 1) compiled with sparse last level, or " - "2) using interval-based data stuctures for L and U"); - *hilu = mxCreateCellMatrix(0, 0); - return; - } - // attributes in crs static const char *crs_attrs[] = {"row_ptr", "col_ind", "val", "nrows", "ncols"}; @@ -834,7 +518,7 @@ inline void M_export(int id, const bool destroy, mxArray **hilu) { "F", "s_row", "s_col", "p", "p_inv", "q", "q_inv"}; - value_t *vptr1, *vptr2, *vptr3, *vptr4, *vptr5; + value_t * vptr1, *vptr2, *vptr3, *vptr4, *vptr5; std::vector vbuf1, vbuf2, vbuf3, vbuf4, vbuf5; // initialize cell array (hilu) @@ -881,9 +565,9 @@ inline void M_export(int id, const bool destroy, mxArray **hilu) { *s_col = mxCreateUninitNumericMatrix( n, 1, IsMixed ? mxSINGLE_CLASS : mxDOUBLE_CLASS, mxREAL); // row/column permutation arrays - auto *p = mxCreateUninitNumericMatrix(n, 1, mxINT64_CLASS, mxREAL), + auto *p = mxCreateUninitNumericMatrix(n, 1, mxINT64_CLASS, mxREAL), *p_inv = mxCreateUninitNumericMatrix(n, 1, mxINT64_CLASS, mxREAL), - *q = mxCreateUninitNumericMatrix(n, 1, mxINT64_CLASS, mxREAL), + *q = mxCreateUninitNumericMatrix(n, 1, mxINT64_CLASS, mxREAL), *q_inv = mxCreateUninitNumericMatrix(n, 1, mxINT64_CLASS, mxREAL); get_num_data(L_val, &vptr1, vbuf1); @@ -1024,7 +708,7 @@ inline void M_export(int id, const bool destroy, mxArray **hilu) { // check for last level if (iter->is_last_level()) { typename crs_t::size_type nrows, ncols; - value_t *dummy(nullptr); + value_t * dummy(nullptr); iter->inquire_or_export_dense(dummy, nrows, ncols, destroy); auto *mat = mxCreateUninitNumericMatrix( nrows, ncols, IsMixed ? mxSINGLE_CLASS : mxDOUBLE_CLASS, DTYPE); diff --git a/mex/hifir4m_clear.m b/mex/hifir4m_clear.m deleted file mode 100644 index 8051f34..0000000 --- a/mex/hifir4m_clear.m +++ /dev/null @@ -1,22 +0,0 @@ -function hifir4m_clear(dbase) -%HIFIR4M_CLEAR - Clear the storage in a database -% -% Syntax: -% hifir4m_clear(dbase) -% -% Description: -% Clearing the storage is the purpose of this routine. -% -% See Also: -% HIFIR4M_FINALIZE - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -hifir4m_mex(HIFIR4M_CLEAR, dbase); - -%-------------------------- END MAIN CODE -------------------------------% -end \ No newline at end of file diff --git a/mex/hifir4m_config.hpp b/mex/hifir4m_config.hpp index 6ae563d..8eff745 100644 --- a/mex/hifir4m_config.hpp +++ b/mex/hifir4m_config.hpp @@ -42,12 +42,13 @@ do { \ mexPrintf(__msg); \ mexPrintf("\n"); \ - ioFlush(); \ + ioFlush(); \ } while (false) #define HIF_STDERR(__msg) \ do { \ mexWarnMsgTxt(__msg); \ mexWarnMsgTxt("\n"); \ + ioFlush(); \ } while (false) // No ASCII color @@ -71,4 +72,6 @@ #define HIF_LAPACK_INT mwSignedIndex #endif +#include "hifir.hpp" + #endif // HIFIR4M_CONFIG_HPP_ diff --git a/mex/hifir4m_create_params.m b/mex/hifir4m_create_params.m deleted file mode 100644 index f8b4d3d..0000000 --- a/mex/hifir4m_create_params.m +++ /dev/null @@ -1,86 +0,0 @@ -function opts = hifir4m_create_params(varargin) -%HIFIR4M_CREATE_PARAMS - Create options for HIFIR preconditioner -% -% Syntax: -% opts = hifir4m_create_params -% opts = hifir4m_create_params(OptionFieldName, OptionValue) -% -% Description: -% HIFIR4M_CREATE_PARAMS constructs the control parameters used in -% factorizing HIFIR preconditioner. The parameters are well-documented -% in the c++ code thus omitting here. -% -% opts = hifir4m_create_params gets the default parameters -% -% opts = hifir4m_create_params(OptionFieldName, OptionValue) mimics -% the MATLAB parameter syntax (key, value), which is commonly used (e.g. -% in 2-D line plotting). Available option field names are -% tau_L, tau_U, tau_d, tau_kappa, alpha_L, alpha_U, rho, c_d, -% c_h, N, verbose, rf_par, reorder, saddle, check, pre_scale, -% symm_pre_lvls, threads, -% whose meanings are documented in the c++ code. -% -% Examples: -% To get default constrol parameters -% >> opts = hifir4m_create_params; -% -% To customize parameters -% >> opts = hifir4m_create_params('tau_L', 1e-3, 'tau_U', 1e-3); -% >> assert(opts.tau_L == 1e-3); -% -% See Also: -% HIFIR4M_FACTORIZE - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -persistent fields -if isempty(fields) - fields = {'tau_L', 'tau_U', 'kappa_d', 'kappa', 'alpha_L', 'alpha_U', ... - 'rho', 'c_d', 'c_h', 'N', 'verbose', 'rf_par', 'reorder', 'saddle', ... - 'check', 'pre_scale', 'symm_pre_lvls', 'threads', 'mumps_blr', ... - 'fat_schur_1st', 'rrqr_cond', 'pivot', 'gamma', 'beta', 'is_symm', ... - 'no_pre'}; -end - -p = inputParser; -addOptional(p, 'tau_L', 1e-4, @(x) isscalar(x) && x > 0); -addOptional(p, 'tau_U', 1e-4, @(x) isscalar(x) && x > 0); -addOptional(p, 'kappa_d', 3.0, @(x) isscalar(x) && x > 0); -addOptional(p, 'kappa', 3.0, @(x) isscalar(x) && x > 0); -addOptional(p, 'alpha_L', 10, @(x) isscalar(x) && x > 0); -addOptional(p, 'alpha_U', 10, @(x) isscalar(x) && x > 0); -addOptional(p, 'rho', 0.5, @(x) isscalar(x) && x > 0); -addOptional(p, 'c_d', 10.0, @(x) isscalar(x) && x > 0); -addOptional(p, 'c_h', 2.0, @(x) isscalar(x) && x > 0); -addOptional(p, 'N', -1, @isscalar); -addOptional(p, 'verbose', 1, @(x) isscalar(x) && x >= 0); -addOptional(p, 'rf_par', 1, @(x) isscalar(x) && x >= 0); -addOptional(p, 'reorder', 1, @(x) isscalar(x) && x >= 0); -addOptional(p, 'saddle', 1, @(x) isscalar(x) && x >= 0); -addOptional(p, 'check', 1, @(x) isscalar(x) && x >= 0); -addOptional(p, 'pre_scale', 0, @(x) isscalar(x) && x >= 0); -addOptional(p, 'symm_pre_lvls', 1, @(x) isscalar(x) && x >= 0); -addOptional(p, 'threads', 0, @(x) isscalar(x) && x >= 0); -addOptional(p, 'mumps_blr', 1, @(x) isscalar(x) && x >= 0); -addOptional(p, 'fat_schur_1st', 0, @(x) isscalar(x) && x >= 0); -addOptional(p, 'rrqr_cond', 0, @(x) isscalar(x)); -addOptional(p, 'pivot', 0, @(x) isscalar(x) && x >= 0); -addOptional(p, 'gamma', 1.0, @(x) isscalar(x) && x >= 0); -addOptional(p, 'beta', 1e3, @(x) isscalar(x) && x >= 0); -addOptional(p, 'is_symm', 0, @(x) isscalar(x) && x>= 0); -addOptional(p, 'no_pre', 0, @(x) isscalar(x) && x >= 0); -parse(p, varargin{:}); -sorted_opts = p.Results; -% NOTE parser gives sorted structure -opts = struct; -for idx = 1:length(fields) - f = fields{idx}; - opts.(f) = sorted_opts.(f); -end - -%-------------------------- END MAIN CODE -------------------------------% -end diff --git a/mex/hifir4m_empty.m b/mex/hifir4m_empty.m deleted file mode 100644 index 901a8db..0000000 --- a/mex/hifir4m_empty.m +++ /dev/null @@ -1,22 +0,0 @@ -function flag = hifir4m_empty(dbase) -%HIFIR4M_EMPTY - Check the emptyness of a database -% -% Syntax: -% hifir4m_empty(dbase) -% -% Description: -% Check the emptyness of a database -% -% See Also: -% HIFIR4M_INITIALIZE - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -flag = hifir4m_mex(HIFIR4M_CHECK, dbase); - -%-------------------------- END MAIN CODE -------------------------------% -end \ No newline at end of file diff --git a/mex/hifir4m_ensure_int.m b/mex/hifir4m_ensure_int.m deleted file mode 100644 index cd2dc19..0000000 --- a/mex/hifir4m_ensure_int.m +++ /dev/null @@ -1,16 +0,0 @@ -function A = hifir4m_ensure_int(A) -%HIFIR4M_ENSURE_INT - Convert CRS to proper integer type (i.e., int{32,64}) -% -% Syntax: -% A = hifir4m_ensure_int(A) - -isint64 = hifir4m_isint64; -if isa(A.row_ptr, 'int32') && isint64 - A.row_ptr = int64(A.row_ptr); - A.col_ind = int64(A.col_ind); -elseif isa(A.row_ptr, 'int64') && ~isint64 - A.row_ptr = int32(A.row_ptr); - A.col_ind = int32(A.col_ind); -end - -end \ No newline at end of file diff --git a/mex/hifir4m_export.m b/mex/hifir4m_export.m deleted file mode 100644 index b7e8497..0000000 --- a/mex/hifir4m_export.m +++ /dev/null @@ -1,84 +0,0 @@ -function hilu = hifir4m_export(dbase, varargin) -%HIFIR4M_EXPORT - Export internal data to MATLAB -% -% Syntax: -% hilu = hifir4m_export(dbase) -% hilu = hifir4m_export(dbase, Options) -% -% Description: -% HIFIR4M_EXPORT exports the internal data inside C++ to MATLAB -% -% hilu = hifir4m_export(dbase) exports data in all levels to MATLAB, -% where hilu is a cell array, whose length is the number of levels. -% Each sparse level will contain a struct of "L_B", "D_B, "U_B", "E", -% "F", "s_row", "s_col", "p", "p_inv", "q", and "q_inv" total 11 -% attributes. For the last dense level, it will output the dense matrix, -% and the user need to factorize it manullly by either LU or QR. -% -% hilu = hifir4m_export(dbase, Options) with customized options: -% 1. 'format': {'sparse', 'economic'}, the default is `sparse`. For -% 'economic', it will output struct of CRS_MATRIX. -% 2. 'destroy': boolean flag, if true, then it will destroy the -% internal data level by level while exporting it to MATLAB. -% The default value is false. (Advanced usage!) -% -% Examples: -% To get all levels in sparse format -% >> hilu = hifir4m_export(dbase); -% >> for lvl = 1:size(hilu,1); disp(hilu{i}); end -% -% To get with economic format, do the following -% >> hilu = hifir4m_export(dbase, 'format', 'economic'); -% -% See Also: -% HIFIR4M_FACTORIZE, LU, QR - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -p = inputParser; -addOptional(p, 'format', 'sparse', @(x) ismember(x, {'sparse', 'economic'})); -addOptional(p, 'destroy', false, @(x) isscalar(x)); -parse(p, varargin{:}); % parse -opts = p.Results; - -hilu = hifir4m_mex(HIFIR4M_EXPORT_DATA, dbase, opts.destroy); - -if strcmp(opts.format, 'sparse') - for i = 1:size(hilu,1) - if isstruct(hilu{i}) - % sparse level - n = double(hilu{i}.L_B.nrows); - hilu{i}.L_B = crsSparse(hilu{i}.L_B); - hilu{i}.L_B = hilu{i}.L_B + speye(n); - hilu{i}.D_B = spdiags(hilu{i}.D_B, 1, n, n); - hilu{i}.U_B = crsSparse(hilu{i}.U_B); - hilu{i}.U_B = hilu{i}.U_B + speye(n); - hilu{i}.E = crsSparse(hilu{i}.E); - hilu{i}.F = crsSparse(hilu{i}.F); - end - end -end - -%-------------------------- END MAIN CODE -------------------------------% -end - -function row_ind = crsRowind(row_ptr, col_ind) -row_ind = zeros(size(col_ind),class(col_ind)); - -nrows = int32(length(row_ptr))-1; -for i=1:nrows - for j = row_ptr(i) : row_ptr(i+1) - 1 - row_ind(j) = i; - end -end -end - -function B = crsSparse(A) -row_ind = crsRowind(A.row_ptr, A.col_ind); -B = sparse(double(row_ind), double(A.col_ind), A.val, double(A.nrows), ... - double(A.ncols)); -end \ No newline at end of file diff --git a/mex/hifir4m_factorize.m b/mex/hifir4m_factorize.m deleted file mode 100644 index 6249217..0000000 --- a/mex/hifir4m_factorize.m +++ /dev/null @@ -1,97 +0,0 @@ -function varargout = hifir4m_factorize(dbase, A, varargin) -%HIFIR4M_FACTORIZE - Factorize HIFIR preconditioner -% -% Syntax: -% hifir4m_factorize(dbase, A) -% hifir4m_factorize(dbase, A, opts) -% hifir4m_factorize(dbase, A, fieldName, fieldValue, ...) -% t = hifir4m_factorize(___) -% [t, fac_info] = hifir4m_factorize(___) -% -% Description: -% HIFIR4M_FACTORIZE is the driver call for factorizing a squared sparse -% matrix with HIFIR. It has the above different syntaxes, which will be -% addresses shortly. -% -% hifir4m_factorize(dbase, A) simply factorizes a given matrix with a -% pre-constructed internal database "dbase" and a squared sparse matrix A. -% This syntax uses the default options. -% -% hifir4m_factorize(dbase, A, opts) is like above but with a customized -% option structured. -% -% hifir4m_factorize(dbase, A, fieldName, fieldValue, ...) allows one to -% implicitly pass in parameters without explicitly dealing with an option -% structure. -% -% [t, fac_info] = hifir4m_factorize(___) indicates that the function has -% (potentially) two outputs, where the first one *t* is the total factorizing -% time without MATLAB interpreter overhead. *fac_info* is a structure of -% some information field of interests resulting from the factorization -% computation. -% -% It is worth noting that the variable A can also be a set of length 2, -% in which the first element is a matfile (by name) containing the target -% sparse matrix whose variable name is given by the second element in A. -% The matrix will be cleared once it has been converted into CRS. -% -% For those who are directly working with CRS in MATLAB, they can wrap their -% CRS matrix into a structure with fields: -% row_ptr - starting position array (offset) -% col_ind - column index array -% val - value array -% Notice that for structure input, the row_ptr and col_ind must be int32 -% -% Examples: -% To simply factorize a sparse matrix, we can -% >> dbase = hifir4m_initialize; -% >> A = sprand(10, 10, 0.5); -% >> hifir4m_factorize(dbase, A); -% -% If you are interested in the timing -% >> t = hifir4m_factorize(dbase, A); -% >> disp(t); -% -% Supply your own control options -% >> hifir4m_factorize(dbase, A, 'symm_pre_lvls', 2); % 2 level symm -% -% Pass the matrix via MATFILE -% >> A = sprand(10, 10, 0.5); -% >> save test.mat A -% >> clear A -% >> hifir4m_factorize(dbase, {'test.mat', 'A'}); -% -% Using struct -% >> A.row_ptr = int32(...); -% >> A.col_ind = int32(...); -% >> A.val = ...; -% >> hifir4m_factorize(dbase, A); -% -% See Also: -% HIFIR4M_INITIALIZE, HIFIR4M_CREATE_PARAMS, HIFIR4M_FGMRES - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -if iscell(A) - assert(length(A) == 2); - t = tic; A = getfield(load(A{1}, A{2}), A{2}); t = toc(t); - fprintf(1, 'HIFIR4M factorization I/O time is %.4gs\n', t); -end -if ~isempty(varargin) && isstruct(varargin{1}) - opts = varargin{1}; % ignore whatever goes after the first one -else - opts = hifir4m_create_params(varargin{:}); -end -if nargin < 3 || isempty(opts); end -% convert A to zero-based CRS -if issparse(A); A = hifir4m_sp2crs(A); end -A = hifir4m_ensure_int(A); -[varargout{1:nargout}] = hifir4m_mex(HIFIR4M_FACTORIZE, dbase, ... - A.row_ptr, A.col_ind, A.val, opts); - -%-------------------------- END MAIN CODE -------------------------------% -end diff --git a/mex/hifir4m_fgmres.m b/mex/hifir4m_fgmres.m deleted file mode 100644 index a612658..0000000 --- a/mex/hifir4m_fgmres.m +++ /dev/null @@ -1,117 +0,0 @@ -function varargout = hifir4m_fgmres(dbase, A, b, varargin) -%HIFIR4M_FGMRES - right-preconditioned FGMRES with HIF -% -% Syntax: -% x = hifir4m_fgmres(dbase, A, b) -% x = hifir4m_fgmres(dbase, A, b, restart) -% x = hifir4m_fgmres(dbase, A, b, restart, rtol) -% x = hifir4m_fgmres(dbase, A, b, restart, rtol, maxit) -% x = hifir4m_fgmres(dbase, A, b, restart, rtol, maxit, x0) -% x = hifir4m_fgmres(dbase, A, b, restart, rtol, maxit, x0, verbose) -% [x, flag] = hifir4m_fgmres(___) -% [x, flag, iters] = hifir4m_fgmres(___) -% [x, flag, iters, t] = hifir4m_fgmres(___) -% [___] = hifir4m_fgmres(___, iter_refines) -% [___] = hifir4m_fgmres(___, iter_refines, nsp_cst) -% -% Description: -% HIFIR4M_GMRES is an optimized serial implementation of -% right-preconditioned GMRES (sometimes referred as flexible GMRES [1]), -% which has been shown to be more robust than the left-version [2]. The -% GMRES here uses HIFIR as a right preconditioner, which works very -% well (especially for saddle point and indifinite problems). -% -% Before we dig into each of the syntaxes highlighted above, one thing -% needs to be kept in mind is that the solver MUST be called only after -% an instance of the preconditioner has been successfully factorized! -% -% x = hifir4m_fgmres(dbase, A, b) computes the solution of A\b with -% default parameters. -% -% x = hifir4m_fgmres(dbase, A, b, restart, rtol, maxit) allows one to -% customize restart (30), relative tolerance (1e-6) and maximum -% iterations (500) for the GMRES solver; their default values are shown -% in the parentheses. -% -% x = hifir4m_fgmres(___, x0, verbose) further allows one to supply the -% initial guess (all zeros) and verbose flag (true). -% -% [x, flag, iters, t] = hifir4m_fgmres(___) indicates that there are -% three more optoinal outputs, namely -% flag - solver status flag -% 0 - successed -% 1 - diverged -% 2 - stagnated -% 3 - break-down -% <0 - input errors -% iters - total iterations used -% t - solving wall-clock time (no MATLAB interperater overhead) -% -% [___] = hifir4m_fgmres(___, iter_refines) indicates using iterative -% refine kernel for the preconditioner. -% -% [___] = hifir4m_fgmres(___, iter_refines, nsp_cst) solves a singular problem -% with a (partial) constant mode that is specificed via a size-2 array -% nsp_cst, in which the first entry is the starting const mode entry while -% the ending index for the second element in nsp_cst -% -% Examples: -% The following example shows how to use the GMRES solver -% >> % assume we have dbase initialized and factorized -% >> A = sprand(10, 10, 0.5); -% >> b = rand(size(A, 1), 1); -% >> x = hifir4m_fgmres(dbase, A, b); -% >> assert(norm(A-b)/norm(b)<=1e-6); -% -% References: -% [1] Saad, Y. (1993). A flexible inner-outer preconditioned GMRES -% algorithm. SIAM Journal on Scientific Computing, 14(2), 461-469. -% [2] Ghai, A., Lu, C., & Jiao, X. (2019). A comparison of preconditioned -% Krylov subspace methods for largeā€scale nonsymmetric linear systems. -% Numerical Linear Algebra with Applications, 26(1), e2215. -% -% See Also: -% HIFIR_FACTORIZE, GMRES - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -gmres_pars = [30 1e-6 500]; % restart,rtol,maxit -x0 = []; -verbose = true; -iter_refines = int32(1); -for i = 1:min(3, length(varargin)) - if ~isempty(varargin{i}); gmres_pars(i) = varargin{i}; end -end -if length(varargin) > 3; x0 = varargin{4}; end -if length(varargin) > 4 - if ~isempty(varargin{5}); verbose = logical(varargin{5}); end -end -if length(varargin) > 5 - if ~isempty(varargin{6}); iter_refines = int32(varargin{6}); end -end -if isempty(x0) - if isreal(b) - x0 = zeros(size(b)); - else - x0 = complex(zeros(size(b)), zeros(size(b))); - end -end -% Convert to zero-based CRS -if issparse(A); A = hifir4m_sp2crs(A); end -A = hifir4m_ensure_int(A); -if length(varargin) < 7 || isempty(varargin{7}) - [varargout{1:nargout}] = hifir4m_mex(HIFIR4M_KSP_SOLVE, dbase, ... - A.row_ptr, A.col_ind, A.val, b, gmres_pars(1), gmres_pars(2), ... - gmres_pars(3), x0, verbose, iter_refines); -else - [varargout{1:nargout}] = hifir4m_mex(HIFIR4M_KSP_SOLVE, dbase, ... - A.row_ptr, A.col_ind, A.val, b, gmres_pars(1), gmres_pars(2), ... - gmres_pars(3), x0, verbose, iter_refines, varargin{7}); -end - -%-------------------------- END MAIN CODE -------------------------------% -end \ No newline at end of file diff --git a/mex/hifir4m_finalize.m b/mex/hifir4m_finalize.m deleted file mode 100644 index ec8448b..0000000 --- a/mex/hifir4m_finalize.m +++ /dev/null @@ -1,22 +0,0 @@ -function hifir4m_finalize(dbase) -%HIFIR4M_FINALIZE - Finalize a low-level database -% -% Syntax: -% hifir4m_finalize(dbase) -% -% Description: -% Clean up an existing database. -% -% See Also: -% HIFIR4M_INITIALIZE - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -hifir4m_mex(HIFIR4M_DESTROY, dbase); - -%-------------------------- END MAIN CODE -------------------------------% -end \ No newline at end of file diff --git a/mex/hifir4m_git_revision.m b/mex/hifir4m_git_revision.m deleted file mode 100644 index c1329f6..0000000 --- a/mex/hifir4m_git_revision.m +++ /dev/null @@ -1,65 +0,0 @@ -function rev = hifir4m_git_revision(pkg, rev_type) -%HIFIR4M_GIT_REVISION - Get git revision of the package -% -% Syntax: -% rev = hifir4m_git_revision -% rev = hifir4m_git_revision(pkg) -% rev = hifir4m_git_revision(___, rev_type) -% -% Description: -% rev = hifir4m_git_revision provides the git revision of hifir4m -% packages. -% -% rev = hifir4m_git_revision(pkg) provides the git revision by given a -% package, pkg can be either -% 'hifir4m' - matlab binding (default) -% 'hifir' - C++ HIFIR that the matlab binding was built upon -% -% rev = hifir4m_git_revision(___, rev_type) provides options for -% revision string types, rev_type must be either -% 'default' - default one (default) -% 'short' - short revision hash -% -% Examples: -% Get the default revision hash of HIFIR4M package -% >> disp(hifir4m_git_revision); % should be a long hash -% -% Get the short revision -% >> disp(hifir4m_git_revision([], 'short')); -% -% Get HIFIR revision -% >> disp(hifir4m_git_revision('hifir')); -% -% Note: -% In case the command `git` fails, the returned revision string will be -% 'UNKNOWN'. -% -% See Also: -% HIFIR4M_ROOT - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -if nargin < 1 || isempty(pkg); pkg = 'hifir4m'; end -assert(ismember(pkg, {'hifir4m', 'hifir'})); -if nargin < 2 || isempty(rev_type); rev_type = 'default'; end -assert(ismember(rev_type, {'default', 'short'})); -cmd = 'git rev-parse HEAD'; -if strcmp(rev_type, 'short'); cmd = 'git rev-parse --short HEAD'; end -d_bak = pwd; -cd(hifir4m_root); -if strcmp(pkg, 'hifir'); cd('hifir'); end -[flag, rev] = system(cmd); -if flag - fprintf(2, 'Could not get git revision (%d)!\n', flag); - rev = 'UNKNOWN'; -else - rev = strip(rev); -end -cd(d_bak); - -%-------------------------- END MAIN CODE -------------------------------% -end \ No newline at end of file diff --git a/mex/hifir4m_hifir.m b/mex/hifir4m_hifir.m deleted file mode 100644 index 461aa76..0000000 --- a/mex/hifir4m_hifir.m +++ /dev/null @@ -1,49 +0,0 @@ -function varargout = hifir4m_hifir(dbase, varargin) -%HIFIR4M_HIFIR - Accessing inv(M) using iterative refinement -% -% Syntax: -% x = hifir4m_hifir(dbase, A, b, N) -% x = hifir4m_hifir(dbase, A, b, N, trans) -% x = hifir4m_hifir(dbase, A, b, N, trans, r) -% [x, t] = hifir4m_hifir(___) -% -% Description: -% HIFIR4M_HIFIR allows one to directly access the preconditioner in a -% iterative-refinement fashion. -% -% x = hifir4m_hifir(dbase, A, b, N) iterative refinements with M^g and A -% with N iterations. -% -% x = hifir4m_hifir(dbase, A, b, N, trans, r) iteratively refines M^g and A -% with operation trans (i.e., true for A' and (M^g)') and final Schur -% complement dimension r. -% -% [x, t] = hifir4m_hifir(___) can potentially report the -% overhead-free wall-clock time. -% -% See Also: -% HIFIR4M_SOLVE - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -assert(length(varargin) >= 3, 'inputs must be at least A, b, and N'); -b = varargin{2}; -N = varargin{3}; N = N(1); -% transpose/Hermitian flag -trans = false; -if length(varargin) > 3; trans = logical(varargin{4}); trans = trans(1); end -% final Schur complement dimension -r = -1; -if length(varargin) > 4; r = varargin{5}; r = r(1); end -A = varargin{1}; -if issparse(A); A = hifir4m_sp2crs(A); end -A = hifir4m_ensure_int(A); -[varargout{1:nargout}] = hifir4m_mex(HIFIR4M_M_SOLVE, dbase, b, ... - A.row_ptr, A.col_ind, A.val, N, trans, r); - -%-------------------------- END MAIN CODE -------------------------------% -end \ No newline at end of file diff --git a/mex/hifir4m_initialize.m b/mex/hifir4m_initialize.m deleted file mode 100644 index 5c410d8..0000000 --- a/mex/hifir4m_initialize.m +++ /dev/null @@ -1,38 +0,0 @@ -function dbase = hifir4m_initialize(is_mixed, is_complex) -%HIFIR4M_INITIALIZE - Initialize a low-level database -% -% Syntax: -% dbase = hifir4m_initialize -% dbase = hifir4m_initialize(true) -% dbase = hifir4m_initialize(___, true) -% -% Description: -% HIFIR4M_INITIALIZE should be the first routine you call in this -% library, as it performs the initialization of an internal database. -% -% dbase = hifir4m_initialize constructs a default instance of database -% with double precision preconditioner. -% -% dbase = hifir4m_initialize(true) constructs a mixed-precision -% instance of the internal database, i.e., the preconditioner uses single -% precision floating numbers whereas the solvers accept double precision. -% -% dbase = hifir4m_initialize(___, true) constructs a complex number -% preconditioner instance. -% -% See Also: -% HIFIR4M_FINALIZE, HIFIR4M_FACTORIZE - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -if nargin < 1 || isempty(is_mixed); is_mixed = false; end -if nargin < 2 || isempty(is_complex); is_complex = false; end -assert(isscalar(is_mixed), 'is_mixed must be scalar (prefer boolean)'); -dbase = hifir4m_mex(HIFIR4M_CREATE, is_mixed, is_complex); - -%-------------------------- END MAIN CODE -------------------------------% -end \ No newline at end of file diff --git a/mex/hifir4m_mex.cpp b/mex/hifir4m_mex.cpp index 809e405..f3dd9ea 100644 --- a/mex/hifir4m_mex.cpp +++ b/mex/hifir4m_mex.cpp @@ -19,19 +19,6 @@ #include "hifir4m.hpp" -namespace hifir4m { -enum { - HIFIR4M_FACTORIZE = HIFIR4M_DESTROY + 1, ///< Factorize - HIFIR4M_M_SOLVE = HIFIR4M_FACTORIZE + 1, ///< preconditioner solve - HIFIR4M_KSP_SOLVE = HIFIR4M_M_SOLVE + 1, ///< KSP solve - HIFIR4M_KSP_NULL = HIFIR4M_KSP_SOLVE + 1, ///< KSP for left null - HIFIR4M_EXPORT_DATA = HIFIR4M_KSP_NULL + 1, ///< export data - HIFIR4M_M_SOLVE2 = HIFIR4M_EXPORT_DATA + 1, ///< solve 2 RHS - HIFIR4M_M_MULTIPLY = HIFIR4M_M_SOLVE2 + 1, ///< matrix vector - HIFIR4M_QUERY = HIFIR4M_M_MULTIPLY + 1, ///< query factorization info -}; -} // namespace hifir4m - // structure of database fields static const char* fnames[3] = {"id", "is_mixed", "is_real"}; @@ -317,217 +304,6 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { return; } // end multiply - // M solve - if (action == hifir4m::HIFIR4M_M_SOLVE2) { - mexWarnMsgIdAndTxt( - "hifir4m:mexgateway:m_solve2", - "solving two rhs at the same time is not supported yet!"); -#if 0 - // act, dbase, b - if (nrhs < 3) - mexErrMsgIdAndTxt("hifir4m:mexgateway:m_solve", - "Accessing inv(M) requires 3 inputs"); - double tt; - plhs[0] = mxDuplicateArray(prhs[2]); - if (is_real) - tt = is_mixed ? hifir4m::M_solve2(id, prhs[2], plhs[0]) - : hifir4m::M_solve2(id, prhs[2], plhs[0]); - else - tt = is_mixed ? hifir4m::M_solve2(id, prhs[2], plhs[0]) - : hifir4m::M_solve2(id, prhs[2], plhs[0]); - if (nlhs > 1) plhs[1] = mxCreateDoubleScalar(tt); -#endif - return; - } // end M solve - - if (action == hifir4m::HIFIR4M_KSP_SOLVE) { - // KSP solver - // act, dbase, rowptr, colind, val, b, (restart, rtol, maxit, x0, verbose) - if (nrhs < 6) - mexErrMsgIdAndTxt("hifir4m:mexgateway:ksp_solve", - "KSP solver requires at least 6 inputs"); - const int restart = nrhs < 7 ? 30 : (int)mxGetScalar(prhs[6]); - const double rtol = nrhs < 8 ? 1e-6 : mxGetScalar(prhs[7]); - const int maxit = nrhs < 9 ? 500 : (int)mxGetScalar(prhs[8]); - if (nrhs > 9) { - // has initial guess - plhs[0] = mxDuplicateArray(prhs[9]); - } else { - plhs[0] = mxCreateDoubleMatrix(mxGetM(prhs[5]), 1, - is_real ? mxREAL : mxCOMPLEX); - } - const bool verbose = nrhs < 11 ? true : (bool)mxGetScalar(prhs[10]); - const int irs = nrhs < 12 ? 1 : (int)mxGetScalar(prhs[11]); - int flag, iters; - double res, tt; - if (nrhs < 13) { - if (is_real) { - if (is_mixed) - std::tie(flag, iters, res, tt) = hifir4m::KSP_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0], irs); - else - std::tie(flag, iters, res, tt) = hifir4m::KSP_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0], irs); - } else { - if (is_mixed) - std::tie(flag, iters, res, tt) = hifir4m::KSP_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0], irs); - else - std::tie(flag, iters, res, tt) = hifir4m::KSP_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0], irs); - } - } else { - const mxArray* fhdl = nullptr; // function handler - const char* feval = "feval"; // feval is used to call user func handler - char* fname = nullptr; - int cst_nsp[2]; - const int* nsp_ptr = nullptr; - if (mxIsClass(prhs[12], "function_handle")) { - // function handle - fhdl = prhs[12]; - fname = (char*)feval; - } else if (mxIsChar(prhs[12])) { - // function script - fname = mxArrayToString(prhs[12]); - } else { - if (mxGetM(prhs[12]) * mxGetN(prhs[12]) < 2) - mexErrMsgIdAndTxt("hifir4m:mexgateway:ksp_solve", - "constant null space mode requires two input " - "specifying the range"); - cst_nsp[0] = *mxGetPr(prhs[12]); - cst_nsp[1] = *(mxGetPr(prhs[12]) + 1); - nsp_ptr = cst_nsp; - } - if (is_real) { - if (is_mixed) - std::tie(flag, iters, res, tt) = hifir4m::KSP_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0], irs, nsp_ptr, fname, fhdl); - else - std::tie(flag, iters, res, tt) = hifir4m::KSP_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0], irs, nsp_ptr, fname, fhdl); - } else { - if (is_mixed) - std::tie(flag, iters, res, tt) = hifir4m::KSP_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0], irs, nsp_ptr, fname, fhdl); - else - std::tie(flag, iters, res, tt) = hifir4m::KSP_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0], irs, nsp_ptr, fname, fhdl); - } - if (fname && fname != feval) mxFree(fname); - } - if (nlhs < 2) { - // handle flag - if (flag) { - const char* msg = hif::ksp::flag_repr("GMRES", flag).c_str(); - mexErrMsgIdAndTxt("hifir4m:mexgateway:ksp_solve", - "GMRES failed with flag %d and message:\n%s", flag, - msg); - } - } else { - plhs[1] = mxCreateDoubleScalar((double)flag); - if (nlhs > 2) plhs[2] = mxCreateDoubleScalar((double)iters); - if (nlhs > 3) plhs[3] = mxCreateDoubleScalar(res); - if (nlhs > 4) plhs[4] = mxCreateDoubleScalar(tt); - } - return; - } - - if (action == hifir4m::HIFIR4M_KSP_NULL) { - mexWarnMsgIdAndTxt("hifir4m:mexgateway:ksp_null", - "Nullspace solver is deprecated!"); -#if 0 - // KSP null solver - // act, dbase, rowptr, colind, val, b, (restart, rtol, maxit, x0, verbose, - // hiprec) - if (nrhs < 6) - mexErrMsgIdAndTxt("hifir4m:mexgateway:ksp_solve", - "KSP solver requires at least 6 inputs"); - const int restart = nrhs < 7 ? 30 : (int)mxGetScalar(prhs[6]); - const double rtol = nrhs < 8 ? 1e-6 : mxGetScalar(prhs[7]); - const int maxit = nrhs < 9 ? 500 : (int)mxGetScalar(prhs[8]); - if (nrhs > 9) { - // has initial guess - plhs[0] = mxDuplicateArray(prhs[9]); - } else { - plhs[0] = mxCreateDoubleMatrix(mxGetM(prhs[5]), 1, - is_real ? mxREAL : mxCOMPLEX); - } - const bool verbose = nrhs < 11 ? true : (bool)mxGetScalar(prhs[10]); - const bool hiprec = nrhs < 12 ? false : (bool)mxGetScalar(prhs[11]); - int flag, iters; - double res, tt; - if (is_real) { - if (is_mixed) { - if (hiprec) - std::tie(flag, iters, res, tt) = hifir4m::KSP_null_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0]); - else - std::tie(flag, iters, res, tt) = hifir4m::KSP_null_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0]); - } else { - if (hiprec) - std::tie(flag, iters, res, tt) = hifir4m::KSP_null_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0]); - else - std::tie(flag, iters, res, tt) = - hifir4m::KSP_null_solve(id, restart, maxit, rtol, - verbose, prhs[2], prhs[3], - prhs[4], prhs[5], plhs[0]); - } - } else { - if (is_mixed) { - if (hiprec) - std::tie(flag, iters, res, tt) = - hifir4m::KSP_null_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0]); - else - std::tie(flag, iters, res, tt) = - hifir4m::KSP_null_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0]); - } else { - if (hiprec) - std::tie(flag, iters, res, tt) = - hifir4m::KSP_null_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0]); - else - std::tie(flag, iters, res, tt) = - hifir4m::KSP_null_solve( - id, restart, maxit, rtol, verbose, prhs[2], prhs[3], prhs[4], - prhs[5], plhs[0]); - } - } - if (nlhs < 2) { - // handle flag - if (flag != hif::ksp::STAGNATED) { - const char* msg = hif::ksp::flag_repr("GMRES_Null", flag).c_str(); - mexErrMsgIdAndTxt("hifir4m:mexgateway:ksp_solve", - "GMRES_Null failed with flag %d and message:\n%s", - flag, msg); - } - } else { - plhs[1] = mxCreateDoubleScalar((double)flag); - if (nlhs > 2) plhs[2] = mxCreateDoubleScalar((double)iters); - if (nlhs > 3) plhs[3] = mxCreateDoubleScalar(res); - if (nlhs > 4) plhs[4] = mxCreateDoubleScalar(tt); - } -#endif - return; - } - if (action != hifir4m::HIFIR4M_EXPORT_DATA) mexErrMsgIdAndTxt("hifir4m:mexgateway", "invalid action flag %d", action); diff --git a/mex/hifir4m_mmultiply.m b/mex/hifir4m_mmultiply.m deleted file mode 100644 index f504aca..0000000 --- a/mex/hifir4m_mmultiply.m +++ /dev/null @@ -1,46 +0,0 @@ -function varargout = hifir4m_mmultiply(dbase, varargin) -%HIFIR4M_MMULTIPLY - Accessing inv(M) -% -% Syntax: -% x = hifir4m_mmultiply(dbase, b) -% x = hifir4m_mmultiply(dbase, b, trans) -% x = hifir4m_mmultiply(dbase, b, trans, r) -% [x, t] = hifir4m_mmultiply(___) -% -% Description: -% HIFIR4M_MMULTIPLY computes the multilevel matrix-vector multiplication. -% -% x = hifir4m_mmultiply(dbase, b) computes M*b given a factorized -% database. -% -% x = hifir4m_mmultiply(dbase, b, trans) computes M'*b given a factorized -% database if trans==true. -% -% x = hifir4m_mmultiply(dbase, b, trans, r) computes a multilevel matrix- -% vector multiplication with operation trans with a customized final Schur -% complement dimension r. -% -% [x, t] = hifir4m_mmultiply(___) can potentially report the -% overhead-free wall-clock time. -% -% See Also: -% HIFIR4M_SOLVE - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -assert(length(varargin) >= 1, 'input(s) must be at least RHS b.'); -% transpose/Hermitian flag -trans = false; -if length(varargin) > 1; trans = logical(varargin{2}); trans = trans(1); end -% final Schur complement dimension -r = 0; -if length(varargin) > 2; r = varargin{3}; r = r(1); end -[varargout{1:nargout}] = hifir4m_mex(HIFIR4M_M_MULTIPLY, dbase, varargin{1}, ... - trans, r); - -%-------------------------- END MAIN CODE -------------------------------% -end \ No newline at end of file diff --git a/mex/hifir4m_query_stats.m b/mex/hifir4m_query_stats.m deleted file mode 100644 index 9692157..0000000 --- a/mex/hifir4m_query_stats.m +++ /dev/null @@ -1,24 +0,0 @@ -function info = hifir4m_query_stats(dbase) -%HIFIR4M_QUERY_STATS - Query factorization statistics -% -% Syntax: -% info = hifir4m_query_stats(dbase) -% -% Description: -% Query factorization stats, such as nnz, levels, rank, etc. The output -% info is a structure containing information fields. -% -% See Also: -% HIFIR4M_FACTORIZE - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -info = hifir4m_mex(HIFIR4M_QUERY, dbase); - -%-------------------------- END MAIN CODE -------------------------------% - -end \ No newline at end of file diff --git a/mex/hifir4m_solve.m b/mex/hifir4m_solve.m deleted file mode 100644 index c787736..0000000 --- a/mex/hifir4m_solve.m +++ /dev/null @@ -1,48 +0,0 @@ -function varargout = hifir4m_solve(dbase, varargin) -%HIFIR4M_SOLVER - Accessing inv(M) -% -% Syntax: -% x = hifir4m_solve(dbase, b) -% x = hifir4m_solve(dbase, b, trans) -% x = hifir4m_solve(dbase, b, trans, r) -% [x, t] = hifir4m_solve(___) -% -% Description: -% HIFIR4M_SOLVER allows one to directly access the preconditioner, -% which is useful for developers who want to integrate HIFIR into -% existing (linear) solver frameworks. -% -% x = hifir4m_solve(dbase, b) computes M\b given a factorized -% database. -% -% x = hifir4m_solve(dbase, b, trans) computes M'\b given a factorized -% database if trans==true. -% -% x = hifir4m_solve(dbase, b, trans, r) computes a multilevel triangular -% solve with operation trans with a customized final Schur complement -% dimension r. -% -% [x, t] = hifir4m_solve(___) can potentially report the -% overhead-free wall-clock time. -% -% See Also: -% HIFIR4M_FACTORIZE - -% Author: Qiao Chen -% Email: qiao.chen@stonybrook.edu -% License: GLPv3+ - -%------------------------- BEGIN MAIN CODE ------------------------------% - -assert(length(varargin) >= 1, 'input(s) must be at least RHS b.'); -% transpose/Hermitian flag -trans = false; -if length(varargin) > 1; trans = logical(varargin{2}); trans = trans(1); end -% final Schur complement dimension -r = 0; -if length(varargin) > 2; r = varargin{3}; r = r(1); end -[varargout{1:nargout}] = hifir4m_mex(HIFIR4M_M_SOLVE, dbase, varargin{1}, ... - trans, r); - -%-------------------------- END MAIN CODE -------------------------------% -end \ No newline at end of file diff --git a/mex/hifir4m_sp2crs.m b/mex/hifir4m_sp2crs.m index eb6f42c..9a8ad1e 100644 --- a/mex/hifir4m_sp2crs.m +++ b/mex/hifir4m_sp2crs.m @@ -22,4 +22,11 @@ end [A_struct.row_ptr, A_struct.col_ind, A_struct.val] = hifir4m_ijv2crs(m, rs, ... cs, vs); +if ~isint64 + A_struct.nrows = int32(m); + A_struct.ncols = int32(n); +else + A_struct.nrows = int64(m); + A_struct.ncols = int64(n); +end end diff --git a/mex/hifir4m_version.cpp b/mex/hifir4m_version.cpp new file mode 100644 index 0000000..6f794f6 --- /dev/null +++ b/mex/hifir4m_version.cpp @@ -0,0 +1,33 @@ +/* + This file is part of HILUCSI4M project + + Copyright (C) 2019--2021 NumGeom Group at Stony Brook University + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +#include + +#include "mex.h" + +#include "hifir.hpp" + +// convert IJV format from MATLAB built-in find to CRS +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { + (void)nrhs; + (void)prhs; + plhs[0] = mxCreateDoubleScalar(HIF_GLOBAL_VERSION); + if (nlhs > 1) plhs[1] = mxCreateDoubleScalar(HIF_MAJOR_VERSION); + if (nlhs > 2) plhs[2] = mxCreateDoubleScalar(HIF_MINOR_VERSION); +} \ No newline at end of file diff --git a/mex/private/HIFIR4M_CHECK.m b/mex/private/HIFIR4M_CHECK.m deleted file mode 100644 index a2fa154..0000000 --- a/mex/private/HIFIR4M_CHECK.m +++ /dev/null @@ -1,5 +0,0 @@ -function flag = HIFIR4M_CHECK -%HIFIR4M_CHECK - Check emptyness - -flag = int32(3); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_CLEAR.m b/mex/private/HIFIR4M_CLEAR.m deleted file mode 100644 index fa395f0..0000000 --- a/mex/private/HIFIR4M_CLEAR.m +++ /dev/null @@ -1,5 +0,0 @@ -function flag = HIFIR4M_CLEAR -%HIFIR4M_CLEAR - Clear the database (not deallocate) - -flag = int32(2); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_CREATE.m b/mex/private/HIFIR4M_CREATE.m deleted file mode 100644 index c5e0ac6..0000000 --- a/mex/private/HIFIR4M_CREATE.m +++ /dev/null @@ -1,5 +0,0 @@ -function flag = HIFIR4M_CREATE -%HIFIR4M_CREATE - Get the flag for creating a database - -flag = int32(0); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_DESTROY.m b/mex/private/HIFIR4M_DESTROY.m deleted file mode 100644 index 09aa9cc..0000000 --- a/mex/private/HIFIR4M_DESTROY.m +++ /dev/null @@ -1,5 +0,0 @@ -function flag = HIFIR4M_DESTROY -%HIFIR4M_DESTROY - Get the destroying database flag - -flag = int32(4); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_EXPORT_DATA.m b/mex/private/HIFIR4M_EXPORT_DATA.m deleted file mode 100644 index 32c2a65..0000000 --- a/mex/private/HIFIR4M_EXPORT_DATA.m +++ /dev/null @@ -1,5 +0,0 @@ -function flag = HIFIR4M_EXPORT_DATA -%HIFIR4M_EXPORT_DATA - Flag to export internal data attributes - -flag = int32(HIFIR4M_KSP_NULL + 1); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_FACTORIZE.m b/mex/private/HIFIR4M_FACTORIZE.m deleted file mode 100644 index 5bc1bb5..0000000 --- a/mex/private/HIFIR4M_FACTORIZE.m +++ /dev/null @@ -1,5 +0,0 @@ -function flag = HIFIR4M_FACTORIZE -%HIFIR4M_FACTORIZE - Get the factorization flag - -flag = int32(HIFIR4M_DESTROY + 1); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_KSP_NULL.m b/mex/private/HIFIR4M_KSP_NULL.m deleted file mode 100644 index 3732e95..0000000 --- a/mex/private/HIFIR4M_KSP_NULL.m +++ /dev/null @@ -1,6 +0,0 @@ -function flag = HIFIR4M_KSP_NULL -%HIFIR4M_KSP_NULL - Flag for accessing GMRES left null space solver -% Note: Placeholder - -flag = int32(HIFIR4M_KSP_SOLVE + 1); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_KSP_SOLVE.m b/mex/private/HIFIR4M_KSP_SOLVE.m deleted file mode 100644 index 1be95c2..0000000 --- a/mex/private/HIFIR4M_KSP_SOLVE.m +++ /dev/null @@ -1,5 +0,0 @@ -function flag = HIFIR4M_KSP_SOLVE -%HIFIR4M_KSP_SOLVE - Flag for accessing KSP (FGMRES) solver - -flag = int32(HIFIR4M_M_SOLVE + 1); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_M_MULTIPLY.m b/mex/private/HIFIR4M_M_MULTIPLY.m deleted file mode 100644 index bd78fd0..0000000 --- a/mex/private/HIFIR4M_M_MULTIPLY.m +++ /dev/null @@ -1,5 +0,0 @@ -function flag = HIFIR4M_M_MULTIPLY -%HIFIR4M_M_MULTIPLY - Get the flag for computing M*x - -flag = int32(HIFIR4M_M_SOLVE2 + 1); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_M_SOLVE.m b/mex/private/HIFIR4M_M_SOLVE.m deleted file mode 100644 index 5c4e1f6..0000000 --- a/mex/private/HIFIR4M_M_SOLVE.m +++ /dev/null @@ -1,5 +0,0 @@ -function flag = HIFIR4M_M_SOLVE -%HIFIR4M_M_SOLVE - Get the flag for accessing inv(M) - -flag = int32(HIFIR4M_FACTORIZE + 1); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_M_SOLVE2.m b/mex/private/HIFIR4M_M_SOLVE2.m deleted file mode 100644 index c6e416d..0000000 --- a/mex/private/HIFIR4M_M_SOLVE2.m +++ /dev/null @@ -1,5 +0,0 @@ -function flag = HIFIR4M_M_SOLVE2 -%HIFIR4M_M_SOLVE2 - Get the flag for accessing inv(M) with two RHS - -flag = int32(HIFIR4M_EXPORT_DATA + 1); -end \ No newline at end of file diff --git a/mex/private/HIFIR4M_QUERY.m b/mex/private/HIFIR4M_QUERY.m deleted file mode 100644 index d9b54af..0000000 --- a/mex/private/HIFIR4M_QUERY.m +++ /dev/null @@ -1,6 +0,0 @@ -function flag = HIFIR4M_QUERY -%HIFIR4M_QUERY Query stats - -flag = int32(HIFIR4M_M_MULTIPLY + 1); -end - diff --git a/mex/hifir4m_ijv2crs.cpp b/mex/private/hifir4m_ijv2crs.cpp similarity index 94% rename from mex/hifir4m_ijv2crs.cpp rename to mex/private/hifir4m_ijv2crs.cpp index 0cfc81e..4b34579 100644 --- a/mex/hifir4m_ijv2crs.cpp +++ b/mex/private/hifir4m_ijv2crs.cpp @@ -26,7 +26,7 @@ typedef int integer_type; #define INT_TYPE mxINT32_CLASS #else -typedef mwSignedIndex integer_type; +typedef INT64_T integer_type; #define INT_TYPE mxINT64_CLASS #endif @@ -45,12 +45,13 @@ static inline typename std::enable_if::type convert_ijv2crs( for (mwSize i(0); i < nnz; ++i) { const auto idx = rowptr[rows[i]]; vals_ptr[idx] = vs_ptr[i]; - colind[idx] = cols[i] - 1; + colind[idx] = cols[i]; ++rowptr[rows[i]]; } // revert rowptr (notice that the address has been shiftted leftward by 1) - for (mwSize i(n); i > 1u; --i) rowptr[i] = rowptr[i - 1]; - rowptr[1] = 0; + for (mwSize i(n); i > 1u; --i) rowptr[i] = rowptr[i - 1] + 1; + rowptr[1] = 1; + ++rowptr[n + 1]; } template @@ -81,12 +82,13 @@ static inline typename std::enable_if::type convert_ijv2crs( vals_r_ptr[idx] = vs_r_ptr[i]; vals_i_ptr[idx] = vs_i_ptr[i]; #endif - colind[idx] = cols[i] - 1; + colind[idx] = cols[i]; ++rowptr[rows[i]]; } // revert rowptr (notice that the address has been shiftted leftward by 1) - for (mwSize i(n); i > 1u; --i) rowptr[i] = rowptr[i - 1]; - rowptr[1] = 0; + for (mwSize i(n); i > 1u; --i) rowptr[i] = rowptr[i - 1] + 1; + rowptr[1] = 1; + ++rowptr[n + 1]; } // convert IJV format from MATLAB built-in find to CRS diff --git a/pipitHifir.m b/pipitHifir.m new file mode 100644 index 0000000..04b3cdc --- /dev/null +++ b/pipitHifir.m @@ -0,0 +1,223 @@ +function [x, us, vs, flag, relres, iter, resids, times] = ... + pipitHifir(A, b, nNull, varargin) +%pipitHifir - Compute pseudoinverse solution of an inconsistent system. +% +% x = pipitHifir(A, b, nNull, [, restart, rtols, maxit, x0, vs]) +% solves an inconsistent sparse linear system using PIPIT with +% HIFIR as right preconditioner. Matrix A can be in MATLAB's built-in +% sparse format or in CRS struct with row_ptr, col_ind, and vals fields. +% +% nNull specifies the dimension of the null-space. (This argument is +% presently required but will be optional in the future.) +% +% The optional arguments include: +% M: user-provided HIF preconditioner. The default value is empty indicating +% that this function will compute a preconditioner. +% +% restart: the number of iterations before GMRES restarts. If empty or +% missing, the default value is 30. +% +% rtols: a two-vector specifying the relative tolerances for the +% solution and nulll-space vectors. If empty or missing, the default +% values are [1.e-6, 1.e-12]. +% +% maxit: the maximum number of iterations. If empty or missing, +% the default value is 500. (Note that unlike MATLAB's built-in gmres, +% maxit here refers to the total number of iterations.) +% +% x0: the initial guess. +% vs: the right null space, if known. If vs is specified but is a scalar +% zero, PIPIT will compute the least-squares solution (i.e., it will +% skip projecting off the right-null-space component in the solution). +% +% x = pipitHifir(A, b, nNull, ..., 'name', value, ...) +% allows omitting some of the optional arguments followed by name-value +% pairs for the parameters. The parameters include: +% verbose: verbose level. Default is 1. +% Unmatched parameters are passed to HifParams. +% +% [x, us, vs, flag, relres, iter, resids, times] = pipitHifir(A, b, nNull, ...) +% returns additional output in addition to the solution vector. +% +% us: the (computed) left null-space basis vectors +% vs: the (computed) right null-space basis vectors +% flag: 0 - converged to the desired tolerance TOL within MAXIT iterations. +% 1 - iterated maxit times but did not converge. +% 3 - stagnated (two consecutive iterates were the same). +% relres: the relative residual. +% iter: the number of iterations. +% resids: contains the residial history of all iterations. +% times: returns the setup time and solve time in seconds in a 4-vector. +% +% See also gmresHif, fgmresNull, orthNulls + +if nargin == 0 + help pipitHifir + return; +end + +p = inputParser; + +% Initialize default arguments +addRequired(p, 'nNull', @(x) isscalar(x) && (x>0)); +addParameter(p, 'M', [], @(x) isempty(x) || isa(x, 'Hifir')); +addParameter(p, 'restart', int32(30), @(x) isempty(x) || isscalar(x)); +addParameter(p, 'rtols', [1.e-6,1.e-12], @(x) numel(x)<=2); +addParameter(p, 'maxit', int32(500), @(x) isempty(x) || isscalar(x)); +addParameter(p, 'x0', cast([], class(b)), ... + @(x) isempty(x) || isequal(size(x), size(b))); +addParameter(p, 'vs', cast([], class(b)), ... + @(x) isempty(x) || isscalar(x) && x==0 || isequal(size(x), size(b))); +addParameter(p, 'verbose', int32(1), @isscalar); + +p.KeepUnmatched = true; +parse(p, nNull, varargin{:}); + +opts = p.Results; +% Process positional arguments +if isempty(opts.restart) || opts.restart <= 0 + opts.restart = int32(30); +end +if isempty(opts.maxit) || opts.maxit <= 0 + opts.maxit = int32(500); +end +if isempty(opts.rtols) || opts.rtols(1) <= 0 + rtol = 1.e-6; +else + rtol = opts.rtols(1); +end +if isempty(opts.rtols) || numel(opts.rtols) < 2 || opts.rtols(2) <= 0 + rtol_null = 1.e-12; +else + rtol_null = opts.rtols(2); +end + +% Create Hifir object +computedHif = isempty(opts.M); +times = zeros(4, 1); + +if computedHif + if opts.verbose + fprintf(1, 'Computing hybrid incomplete factorization...\n'); + end + args = namedargs2cell(p.Unmatched); + [hif, info, times(1)] = hifCreate(A, [], 'verbose', opts.verbose>1, args{:}); +else + if opts.verbose + fprintf(1, 'Using user-provided hybrid incomplete factorization...\n'); + end + hif = opts.M; + % get info + info = hifir4m_mex(HifEnum.QUERY, hif.hdl); +end +if opts.verbose + if computedHif + fprintf(1, 'Finished in %.4g seconds \n', times(1)); + disp(info); + end + fprintf(1, 'Computing left null space ...\n'); +end + +% Determine the dimension of nullspace +if isempty(nNull) || nNull <= 0 + nNull = int32(1); +end + +% Compute the left null space using the given nullspace dimension +tic; +[us, iters_left, iters_ir_left] = orthNulls(A, hif, nNull, true, ... + info.schur_rank ~= info.schur_size, opts.restart, rtol_null, opts.maxit); +b = mgs_null_proj(us, b); % project off left nullspace +times(2) = toc; + +% Compute the least-square solution +if opts.verbose + fprintf(1, 'Finished after %d FGMRES iterations with %d iterative refinements in %.4g seconds.\n', ... + sum(iters_left), sum(iters_ir_left), times(2)); + fprintf(1, 'Computing least-squares solution...\n'); +end + +tic; +[x, flag, relres, iter, resids] = fgmres_MGS(A, b, opts.restart, ... + rtol, opts.maxit, hif, [], opts.x0); +times(3) = toc; + +if opts.verbose + if flag == 0 + fprintf(1, 'Finished after %d GMRES iterations in %.4g seconds.\n', iter, times(3)); + elseif flag == 3 + fprintf(1, 'GMRES stagnated after %d iterations and %.4g seconds.\n', iter, times(3)); + else + fprintf(1, 'GMRES failed to converge after %d iterations and %.4g seconds.\n', iter, times(3)); + end +end + +% Return least-squares solution if opts.vs is 0 +if isscalar(opts.vs) + vs = []; + return; +end + +% Compute pseudo-inverse solution +iters_right = []; +if isempty(opts.vs) + if issparse(A) + if norm(A-A',1) <= rtol_null * norm(A,1) + if opts.verbose + if isreal(A) + fprintf(1, 'Matrix is symmetric.'); + else + fprintf(1, 'Matrix is Hermitian.'); + end + fprintf(1, ' Reusing left null-space for right null space.\n'); + end + tic; + vs = us; + else + % compute the right null space + if opts.verbose + fprintf(1, 'Converting least-squares solution to pseudo-inverse solution...\n'); + end + tic; + [vs, iters_right, iters_ir_right] = orthNulls(A, hif, size(us, 2), false, ... + info.schur_rank ~= info.schur_size, opts.restart, ... + rtol_null, opts.maxit); + end + else + % TODO: need to implement this for CRS + assert(false, 'CRS is not yet implemented'); + end +else + vs = opts.vs; +end + +x = mgs_null_proj(vs, x); % project off right nullspace +times(4) = toc; +if opts.verbose + if ~isempty(iters_right) + fprintf(1, 'Finished after %d FGMRES iterations with %d iterative refinements in %.4g seconds.\n', ... + sum(iters_right), sum(iters_ir_right), times(4)); + else + fprintf(1, 'Finished in %g seconds.\n', times(4)); + end +end + +end + +function x = mgs_null_proj(vs, x) +% Orthogonalize via MGS +if isempty(vs); return; end +for i = 1:size(vs,2) + u = vs(:,i); + x = x - (u'*x)*u; +end +end + +function test %#ok +%!test +%! A = gallery('neumann', 256); +%! b = rand(size(A, 1), 1); +%! x = pipitHifir(A, b, 1); +%! assert(norm(A'*(b-A*x))/norm(A'*b) <= 1e8); + +end diff --git a/util/build_hifir4m.m b/util/build_hifir4m.m index 328a096..b097e61 100644 --- a/util/build_hifir4m.m +++ b/util/build_hifir4m.m @@ -1,41 +1,72 @@ -function build_hifir4m(varargin) +function build_hifir4m(force, isInt64) % script for building hifir4m for MATLAB and GNU Octave -if nargin < 1 - force = false; -end +if nargin < 1; force = false; end +if nargin < 2; isInt64 = true; end + +int32Flag = ''; +if ~isInt64; int32Flag = '-DHIFIR4M_USE_32INT'; end mods = {'mex/hifir4m_mex', ... - 'mex/hifir4m_ijv2crs', ... - 'mex/hifir4m_isint64'}; + 'mex/private/hifir4m_ijv2crs', ... + 'mex/hifir4m_isint64', ... + 'mex/hifir4m_version'}; if isoctave - mexCmd = 'mmex'; - sysLibs = ' -llapack -lblas'; + mexCmd = 'mmex -O'; + sysFlags = ' -llapack -lblas'; else - mexCmd = 'mex'; + mexCmd = 'mex -O'; if ispc - sysLibs = ' -DHIF_FC=1 -R2018a LINKLIBS=''-llibmwmathutil $LINKLIBS'''; + sysFlags = ' -DHIF_FC=1 -R2018a LINKLIBS=''-llibmwmathutil $LINKLIBS'''; else - sysLibs = ' -R2018a -lmwlapack -lmwblas -lmwservices'; + sysFlags = ' -R2018a -lmwlapack -lmwblas -lmwservices'; end end +% download hifir +if ~exist(fullfile(hifir4m_root, 'VERSION'), 'file') + hifirVersion = '0.1.0'; +else + fid = fopen(fullfile(hifir4m_root, 'VERSION'), 'r'); + hifirVersion = sprintf('%d.%d.%d', fscanf(fid, '%d.%d.%d')); + fclose(fid); +end +downloaded = false; + +% check HIFIR C++ root +if exist(fullfile(hifir4m_root, ['hifir-' hifirVersion]), 'dir') + pathToHifir = fullfile(hifir4m_root, ['hifir-' hifirVersion]); +else + downloaded = true; + pathToHifir = fullfile(tempdir, ['hifir-' hifirVersion]); +end + for m = 1:length(mods) md = mods{m}; - src = relativepath(fullfile(hifir4m_root, [md '.cpp'])); - mx = relativepath(fullfile(hifir4m_root, [md '.' mexext])); + src = fullfile(hifir4m_root, [md '.cpp']); + mx = fullfile(hifir4m_root, [md '.' mexext]); if ~force && exist(mx, 'file') && ~isnewer(src, mx); continue; end + if downloaded && ~exist(pathToHifir, 'dir') + pathToHifir = download_hifir(hifirVersion); + end % assume GCC openmp cmd = [mexCmd ' ' ... 'LDFLAGS="$LDFLAGS -fopenmp" ' ... % OpenMP linker flag 'CXXFLAGS="$CXXFLAGS -m64 -march=native -O3 -std=c++11 ' ... + int32Flag ' ' ... '-ffast-math -fcx-limited-range -fopenmp" ' ... % C++11/OpenMP compiler - '-I' relativepath(fullfile(hifir4m_root, 'hifir', 'src')) ' ' ... % include - '-O -output ' mx ' ' src sysLibs]; % link to system libraries + '-I''' fullfile(pathToHifir, 'src') ''' ' ... % include + '-output ''' mx ''' ''' src '''' sysFlags]; % link to system libraries disp(cmd); eval(cmd); end + +% Delete downloaded folder if neccessary +if downloaded && exist(pathToHifir, 'dir') + rmdir(pathToHifir, 's'); +end + end function flag = isnewer(f1, f2) diff --git a/util/download_hifir.m b/util/download_hifir.m new file mode 100644 index 0000000..eb7f4ed --- /dev/null +++ b/util/download_hifir.m @@ -0,0 +1,12 @@ +function path_to_hifir = download_hifir(version) +% Download hifir + +zip_file = fullfile(tempdir, 'hifir.zip'); +zip_url = ['https://github.com/hifirworks/hifir/archive/refs/tags/v' ... + version ... + '.zip']; +urlwrite(zip_url, zip_file); +unzip(zip_file, tempdir); +delete(zip_file); +path_to_hifir = fullfile(tempdir, ['hifir-' version]); +end \ No newline at end of file diff --git a/util/octave/mmex.m b/util/octave/mmex.m index 3610207..c382ec8 100644 --- a/util/octave/mmex.m +++ b/util/octave/mmex.m @@ -89,7 +89,7 @@ function mmex(varargin) %% Look for mkoctfile if exist('__octave_config_info__', 'builtin') % octave_config_info is depreciated in 4.2.1 - octave_config_info = @__octave_config_info__; + octave_config_info = @__octave_config_info__; %#ok end bindir = octave_config_info('bindir'); @@ -132,8 +132,8 @@ function mmex(varargin) error('Argument -output must follow a file name.'); end elseif isequal(varargin{i}, '-O') - defs.COPTIMFLAGS = [defs.COPTIMFLAGS ' -O2']; %#ok<*AGROW> - defs.CXXOPTIMFLAGS = [defs.CXXOPTIMFLAGS ' -O2']; %#ok<*AGROW> + defs.COPTIMFLAGS = [defs.COPTIMFLAGS ' -O2 -DNDEBUG']; + defs.CXXOPTIMFLAGS = [defs.CXXOPTIMFLAGS ' -O2 -DNDEBUG']; elseif varargin{i}(1)~='-' && ~isempty(strfind(varargin{i}, '=')) index = strfind(varargin{i}, '='); var = varargin{i}(1:index(1)-1); @@ -206,12 +206,14 @@ function mmex(varargin) macros = [macros 'export CPPFLAGS=''' strtrim(defs.CPPFLAGS) '''; ']; end if ~isempty(defs.CFLAGS) || ~isempty(defs.COPTIMFLAGS) || ~isempty(defs.CDEBUGFLAGS) - macros = [macros 'export CFLAGS=''' strtrim(strrep(defs.CFLAGS, '$CFLAGS', '')) ' ' ... - strtrim(defs.COPTIMFLAGS) ' ' strtrim(defs.CDEBUGFLAGS) '''; ']; + macros = [macros 'export CFLAGS=''' strtrim(defs.COPTIMFLAGS) ' ' ... + strtrim(strrep(defs.CFLAGS, '$CFLAGS', '')) ' ' ... + strtrim(defs.CDEBUGFLAGS) '''; ']; end if ~isempty(defs.CXXFLAGS) || ~isempty(defs.CXXOPTIMFLAGS) || ~isempty(defs.CXXDEBUGFLAGS) - macros = [macros 'export CXXFLAGS=''' strtrim(strrep(defs.CXXFLAGS, '$CXXFLAGS', '')) ' ' ... - strtrim(defs.CXXOPTIMFLAGS) ' ' strtrim(defs.CXXDEBUGFLAGS) '''; ']; + macros = [macros 'export CXXFLAGS=''' strtrim(defs.CXXOPTIMFLAGS) ' ' ... + strtrim(strrep(defs.CXXFLAGS, '$CXXFLAGS', '')) ' ' ... + strtrim(defs.CXXDEBUGFLAGS) '''; ']; elseif ~isempty(defs.CFLAGS) macros = [macros 'export CXXFLAGS=''' strtrim(defs.CFLAGS) '''; ']; end @@ -239,7 +241,7 @@ function mmex(varargin) if verbose disp(cmd); end - [status, text] = unix(cmd, '-echo'); + [status, ~] = unix(cmd, '-echo'); if status || target && ~exist(target, 'file') error('mkoctfile failed'); end diff --git a/util/relativepath.m b/util/relativepath.m deleted file mode 100644 index ecf41f2..0000000 --- a/util/relativepath.m +++ /dev/null @@ -1,107 +0,0 @@ -function rel_path = relativepath( tgt_path, act_path ) -%RELATIVEPATH returns the relative path from an actual path to the target path. -% Both arguments must be strings with absolute paths. -% The actual path is optional, if omitted the current dir is used instead. -% In case the volume drive letters don't match, an absolute path will be returned. -% If a relative path is returned, it always starts with '.\' or '..\' -% -% Syntax: -% rel_path = RELATIVEPATH( target_path, actual_path ) -% -% Parameters: -% target_path - Path which is targetted -% actual_path - Start for relative path (optional, default = current dir) -% -% Examples: -% relativepath( 'C:\local\data\matlab' , 'C:\local' ) = '.\data\matlab\' -% relativepath( 'A:\MyProject\' , 'C:\local' ) = 'a:\myproject\' -% -% relativepath( 'C:\local\data\matlab' , pwd ) is the same as -% relativepath( 'C:\local\data\matlab' ) -% -% See also: ABSOLUTEPATH PATH - -% Jochen Lenz - - -% 2nd parameter is optional: -if nargin < 2 - act_path = pwd; -end - -% Predefine return string: -rel_path = ''; - -% Make sure strings end by a filesep character: -if isempty(act_path) || ~isequal(act_path(end),filesep) - act_path = [act_path filesep]; -end -if isempty(tgt_path) || ~isequal(tgt_path(end),filesep) - tgt_path = [tgt_path filesep]; -end - -% Convert to all lowercase: -[act_path] = fileparts( lower(act_path) ); -[tgt_path] = fileparts( lower(tgt_path) ); - -% Create a cell-array containing the directory levels: -act_path_cell = pathparts(act_path); -tgt_path_cell = pathparts(tgt_path); - -% If volumes are different, return absolute path: -if isempty(act_path_cell) || isempty(tgt_path_cell) - return % rel_path = '' -else - if ~isequal( act_path_cell{1} , tgt_path_cell{1} ) - rel_path = tgt_path; - return - end -end - -% Remove level by level, as long as both are equal: -while ~isempty(act_path_cell) && ~isempty(tgt_path_cell) - if isequal( act_path_cell{1}, tgt_path_cell{1} ) - act_path_cell(1) = []; - tgt_path_cell(1) = []; - else - break - end -end - -% As much levels down ('..\') as levels are remaining in "act_path": -for i = 1 : length(act_path_cell) - rel_path = ['..' filesep rel_path]; %#ok<*AGROW> -end - -% Relative directory levels to target directory: -for i = 1 : length(tgt_path_cell) - rel_path = [rel_path tgt_path_cell{i} filesep]; -end - -% Start with '.' or '..' : -if isempty(rel_path) - rel_path = ['.' filesep]; -elseif ~isequal(rel_path(1),'.') - rel_path = ['.' filesep rel_path]; -end - -% Remove filesep at the end it if was not in the input -if tgt_path(end) ~= filesep - rel_path(end) = []; -end - -return - -% ------------------------------------------------- - -function path_cell = pathparts(path_str) - -path_str = [filesep path_str filesep]; -path_cell = {}; - -sep_pos = strfind( path_str, filesep ); -for i = 1 : length(sep_pos)-1 - path_cell{i} = path_str( sep_pos(i)+1 : sep_pos(i+1)-1 ); -end - -return \ No newline at end of file