From 48a20d6b88089d6878626826631b2d6648c66a3f Mon Sep 17 00:00:00 2001 From: "Documenter.jl" Date: Fri, 22 Nov 2024 15:45:29 +0000 Subject: [PATCH] build based on b0969b5 --- dev/.documenter-siteinfo.json | 2 +- dev/assets/documenter.js | 302 +++++++++++++++------------- dev/index.html | 4 +- dev/krylov_operators/index.html | 49 +++++ dev/objects.inv | Bin 936 -> 561 bytes dev/reference/index.html | 7 +- dev/search_index.js | 2 +- dev/triangular_operators/index.html | 49 +++++ 8 files changed, 263 insertions(+), 152 deletions(-) create mode 100644 dev/krylov_operators/index.html create mode 100644 dev/triangular_operators/index.html diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json index 04c9ef4..5309f27 100644 --- a/dev/.documenter-siteinfo.json +++ b/dev/.documenter-siteinfo.json @@ -1 +1 @@ -{"documenter":{"julia_version":"1.11.1","generation_timestamp":"2024-11-08T19:43:29","documenter_version":"1.7.0"}} \ No newline at end of file +{"documenter":{"julia_version":"1.11.1","generation_timestamp":"2024-11-22T15:45:24","documenter_version":"1.8.0"}} \ No newline at end of file diff --git a/dev/assets/documenter.js b/dev/assets/documenter.js index 82252a1..7d68cd8 100644 --- a/dev/assets/documenter.js +++ b/dev/assets/documenter.js @@ -612,176 +612,194 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) { }; } -// `worker = Threads.@spawn worker_function(documenterSearchIndex)`, but in JavaScript! -const filters = [ - ...new Set(documenterSearchIndex["docs"].map((x) => x.category)), -]; -const worker_str = - "(" + - worker_function.toString() + - ")(" + - JSON.stringify(documenterSearchIndex["docs"]) + - "," + - JSON.stringify(documenterBaseURL) + - "," + - JSON.stringify(filters) + - ")"; -const worker_blob = new Blob([worker_str], { type: "text/javascript" }); -const worker = new Worker(URL.createObjectURL(worker_blob)); - /////// SEARCH MAIN /////// -// Whether the worker is currently handling a search. This is a boolean -// as the worker only ever handles 1 or 0 searches at a time. -var worker_is_running = false; - -// The last search text that was sent to the worker. This is used to determine -// if the worker should be launched again when it reports back results. -var last_search_text = ""; - -// The results of the last search. This, in combination with the state of the filters -// in the DOM, is used compute the results to display on calls to update_search. -var unfiltered_results = []; - -// Which filter is currently selected -var selected_filter = ""; - -$(document).on("input", ".documenter-search-input", function (event) { - if (!worker_is_running) { - launch_search(); - } -}); - -function launch_search() { - worker_is_running = true; - last_search_text = $(".documenter-search-input").val(); - worker.postMessage(last_search_text); -} - -worker.onmessage = function (e) { - if (last_search_text !== $(".documenter-search-input").val()) { - launch_search(); - } else { - worker_is_running = false; - } - - unfiltered_results = e.data; - update_search(); -}; +function runSearchMainCode() { + // `worker = Threads.@spawn worker_function(documenterSearchIndex)`, but in JavaScript! + const filters = [ + ...new Set(documenterSearchIndex["docs"].map((x) => x.category)), + ]; + const worker_str = + "(" + + worker_function.toString() + + ")(" + + JSON.stringify(documenterSearchIndex["docs"]) + + "," + + JSON.stringify(documenterBaseURL) + + "," + + JSON.stringify(filters) + + ")"; + const worker_blob = new Blob([worker_str], { type: "text/javascript" }); + const worker = new Worker(URL.createObjectURL(worker_blob)); + + // Whether the worker is currently handling a search. This is a boolean + // as the worker only ever handles 1 or 0 searches at a time. + var worker_is_running = false; + + // The last search text that was sent to the worker. This is used to determine + // if the worker should be launched again when it reports back results. + var last_search_text = ""; + + // The results of the last search. This, in combination with the state of the filters + // in the DOM, is used compute the results to display on calls to update_search. + var unfiltered_results = []; + + // Which filter is currently selected + var selected_filter = ""; + + $(document).on("input", ".documenter-search-input", function (event) { + if (!worker_is_running) { + launch_search(); + } + }); -$(document).on("click", ".search-filter", function () { - if ($(this).hasClass("search-filter-selected")) { - selected_filter = ""; - } else { - selected_filter = $(this).text().toLowerCase(); + function launch_search() { + worker_is_running = true; + last_search_text = $(".documenter-search-input").val(); + worker.postMessage(last_search_text); } - // This updates search results and toggles classes for UI: - update_search(); -}); + worker.onmessage = function (e) { + if (last_search_text !== $(".documenter-search-input").val()) { + launch_search(); + } else { + worker_is_running = false; + } -/** - * Make/Update the search component - */ -function update_search() { - let querystring = $(".documenter-search-input").val(); + unfiltered_results = e.data; + update_search(); + }; - if (querystring.trim()) { - if (selected_filter == "") { - results = unfiltered_results; + $(document).on("click", ".search-filter", function () { + if ($(this).hasClass("search-filter-selected")) { + selected_filter = ""; } else { - results = unfiltered_results.filter((result) => { - return selected_filter == result.category.toLowerCase(); - }); + selected_filter = $(this).text().toLowerCase(); } - let search_result_container = ``; - let modal_filters = make_modal_body_filters(); - let search_divider = `
`; + // This updates search results and toggles classes for UI: + update_search(); + }); - if (results.length) { - let links = []; - let count = 0; - let search_results = ""; - - for (var i = 0, n = results.length; i < n && count < 200; ++i) { - let result = results[i]; - if (result.location && !links.includes(result.location)) { - search_results += result.div; - count++; - links.push(result.location); - } - } + /** + * Make/Update the search component + */ + function update_search() { + let querystring = $(".documenter-search-input").val(); - if (count == 1) { - count_str = "1 result"; - } else if (count == 200) { - count_str = "200+ results"; + if (querystring.trim()) { + if (selected_filter == "") { + results = unfiltered_results; } else { - count_str = count + " results"; + results = unfiltered_results.filter((result) => { + return selected_filter == result.category.toLowerCase(); + }); } - let result_count = `
${count_str}
`; - search_result_container = ` + let search_result_container = ``; + let modal_filters = make_modal_body_filters(); + let search_divider = `
`; + + if (results.length) { + let links = []; + let count = 0; + let search_results = ""; + + for (var i = 0, n = results.length; i < n && count < 200; ++i) { + let result = results[i]; + if (result.location && !links.includes(result.location)) { + search_results += result.div; + count++; + links.push(result.location); + } + } + + if (count == 1) { + count_str = "1 result"; + } else if (count == 200) { + count_str = "200+ results"; + } else { + count_str = count + " results"; + } + let result_count = `
${count_str}
`; + + search_result_container = ` +
+ ${modal_filters} + ${search_divider} + ${result_count} +
+ ${search_results} +
+
+ `; + } else { + search_result_container = `
${modal_filters} ${search_divider} - ${result_count} -
- ${search_results} -
-
+
0 result(s)
+ +
No result found!
`; - } else { - search_result_container = ` -
- ${modal_filters} - ${search_divider} -
0 result(s)
-
-
No result found!
- `; - } + } - if ($(".search-modal-card-body").hasClass("is-justify-content-center")) { - $(".search-modal-card-body").removeClass("is-justify-content-center"); - } + if ($(".search-modal-card-body").hasClass("is-justify-content-center")) { + $(".search-modal-card-body").removeClass("is-justify-content-center"); + } - $(".search-modal-card-body").html(search_result_container); - } else { - if (!$(".search-modal-card-body").hasClass("is-justify-content-center")) { - $(".search-modal-card-body").addClass("is-justify-content-center"); + $(".search-modal-card-body").html(search_result_container); + } else { + if (!$(".search-modal-card-body").hasClass("is-justify-content-center")) { + $(".search-modal-card-body").addClass("is-justify-content-center"); + } + + $(".search-modal-card-body").html(` +
Type something to get started!
+ `); } + } - $(".search-modal-card-body").html(` -
Type something to get started!
- `); + /** + * Make the modal filter html + * + * @returns string + */ + function make_modal_body_filters() { + let str = filters + .map((val) => { + if (selected_filter == val.toLowerCase()) { + return `${val}`; + } else { + return `${val}`; + } + }) + .join(""); + + return ` +
+ Filters: + ${str} +
`; } } -/** - * Make the modal filter html - * - * @returns string - */ -function make_modal_body_filters() { - let str = filters - .map((val) => { - if (selected_filter == val.toLowerCase()) { - return `${val}`; - } else { - return `${val}`; - } - }) - .join(""); - - return ` -
- Filters: - ${str} -
`; +function waitUntilSearchIndexAvailable() { + // It is possible that the documenter.js script runs before the page + // has finished loading and documenterSearchIndex gets defined. + // So we need to wait until the search index actually loads before setting + // up all the search-related stuff. + if (typeof documenterSearchIndex !== "undefined") { + runSearchMainCode(); + } else { + console.warn("Search Index not available, waiting"); + setTimeout(waitUntilSearchIndexAvailable, 1000); + } } +// The actual entry point to the search code +waitUntilSearchIndexAvailable(); + }) //////////////////////////////////////////////////////////////////////////////// require(['jquery'], function($) { diff --git a/dev/index.html b/dev/index.html index 1213bb0..88b0c08 100644 --- a/dev/index.html +++ b/dev/index.html @@ -1,4 +1,4 @@ -Home · KrylovPreconditioners.jl

KrylovPreconditioners.jl documentation

This package provides a collection of preconditioners.

How to Cite

If you use KrylovPreconditioners.jl in your work, please cite using the format given in CITATION.cff.

How to Install

KrylovPreconditioners.jl can be installed and tested through the Julia package manager:

julia> ]
+Home · KrylovPreconditioners.jl

KrylovPreconditioners.jl documentation

This package provides a collection of preconditioners.

How to Cite

If you use KrylovPreconditioners.jl in your work, please cite using the format given in CITATION.cff.

How to Install

KrylovPreconditioners.jl can be installed and tested through the Julia package manager:

julia> ]
 pkg> add KrylovPreconditioners
-pkg> test KrylovPreconditioners

Bug reports and discussions

If you think you found a bug, feel free to open an issue. Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please.

If you want to ask a question not suited for a bug report, feel free to start a discussion here. This forum is for general discussion about this repository and the JuliaSmoothOptimizers organization, so questions about any of our packages are welcome.

+pkg> test KrylovPreconditioners

Bug reports and discussions

If you think you found a bug, feel free to open an issue. Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please.

If you want to ask a question not suited for a bug report, feel free to start a discussion here. This forum is for general discussion about this repository and the JuliaSmoothOptimizers organization, so questions about any of our packages are welcome.

diff --git a/dev/krylov_operators/index.html b/dev/krylov_operators/index.html new file mode 100644 index 0000000..cd438d3 --- /dev/null +++ b/dev/krylov_operators/index.html @@ -0,0 +1,49 @@ + +Krylov operators · KrylovPreconditioners.jl

Krylov operators

KrylovPreconditioners.KrylovOperatorFunction
KrylovOperator(A; nrhs::Int=1, transa::Char='N')

Create a Krylov operator to accelerate sparse matrix-vector or matrix-matrix products on GPU architectures. The operator is compatible with sparse matrices stored on NVIDIA, AMD, and Intel GPUs.

Input arguments

  • A: The sparse matrix on the GPU that serves as the operator for matrix-vector or matrix-matrix products;
  • nrhs: Specifies the number of columns for the right-hand sides. Defaults to 1 for standard matrix-vector products;
  • transa: Determines how the matrix A is applied during the products; 'N' for no transposition, 'T' for transpose, and 'C' for conjugate transpose.

Output argument

  • op: An instance of AbstractKrylovOperator representing the Krylov operator for the specified sparse matrix and parameters.
source
KrylovPreconditioners.update!Method
update!(op::AbstractKrylovOperator, A)

Update the sparse matrix A associated with the given AbstractKrylovOperator without the need to reallocate buffers or repeat the structural analysis phase for detecting parallelism for sparse matrix-vector or matrix-matrix products. A and the operator op must have the same sparsity pattern, enabling efficient reuse of existing resources.

Input arguments

  • op: The Krylov operator to update;
  • A: The new sparse matrix to associate with the operator.
source

Nvidia GPUs

Sparse matrices have a specific storage on Nvidia GPUs (CuSparseMatrixCSC, CuSparseMatrixCSR or CuSparseMatrixCOO):

using CUDA, CUDA.CUSPARSE
+using SparseArrays
+using KrylovPreconditioners
+
+if CUDA.functional()
+  # CPU Arrays
+  A_cpu = sprand(200, 100, 0.3)
+
+  # GPU Arrays
+  A_csc_gpu = CuSparseMatrixCSC(A_cpu)
+  A_csr_gpu = CuSparseMatrixCSR(A_cpu)
+  A_coo_gpu = CuSparseMatrixCOO(A_cpu)
+
+  # Krylov operators
+  op_csc = KrylovOperator(A_csc_gpu; nrhs=1, transa='N')
+  op_csr = KrylovOperator(A_csr_gpu; nrhs=1, transa='T')
+  op_coo = KrylovOperator(A_coo_gpu; nrhs=5, transa='N')
+end

AMD GPUs

Sparse matrices have a specific storage on AMD GPUs (ROCSparseMatrixCSC, ROCSparseMatrixCSR or ROCSparseMatrixCOO):

using AMDGPU, AMDGPU.rocSPARSE
+using SparseArrays
+using KrylovPreconditioners
+
+if AMDGPU.functional()
+  # CPU Arrays
+  A_cpu = sprand(200, 100, 0.3)
+
+  # GPU Arrays
+  A_csc_gpu = ROCSparseMatrixCSC(A_cpu)
+  A_csr_gpu = ROCSparseMatrixCSR(A_cpu)
+  A_coo_gpu = ROCSparseMatrixCOO(A_cpu)
+
+  # Krylov operators
+  op_csc = KrylovOperator(A_csc_gpu; nrhs=1, transa='N')
+  op_csr = KrylovOperator(A_csr_gpu; nrhs=1, transa='T')
+  op_coo = KrylovOperator(A_coo_gpu; nrhs=5, transa='N')
+end

Intel GPUs

Sparse matrices have a specific storage on Intel GPUs (oneSparseMatrixCSR):

using oneAPI, oneAPI.oneMKL
+using SparseArrays
+using KrylovPreconditioners
+
+if oneAPI.functional()
+  # CPU Arrays
+  A_cpu = sprand(Float32, 20, 10, 0.3)
+
+  # GPU Arrays
+  A_csr_gpu = oneSparseMatrixCSR(A_cpu)
+
+  # Krylov operator
+  op_csr = KrylovOperator(A_csr_gpu; nrhs=1, transa='N')
+end
diff --git a/dev/objects.inv b/dev/objects.inv index 1ff83c1f0e3405ff93337a9bc05aea7e64bcd0e4..bca505dadbc3d72c13db4bb15ed9c37e7c74a362 100644 GIT binary patch delta 432 zcmV;h0Z;y@2eAZ@i+`n5O;f@!6ukFWJV$#oB3zj926zx1M+dw+A$edSB%ORHjN^Yd z6)Y`HK*n49K6W?TO*6@H1;+9lt{9Luz@1uRve0Xx-d|OYnp&kzbz;BQf`^|9l$FHl zjLMl4REHr#{T95G)e4lxDQc7qtNUfK1*6}`IH{l$6j9pqi+?b2;X{4hPi{{7tdS|n zGn*-i+FZ>Acwugd($b=q3`jwKQ#<>ia!Nm as`US_O$g-Bt*PtWV+;KRNb(IblwU9mThUPf delta 810 zcmV+_1J(So1gHm)i+{~m-*1~R5Ps*caJ1#EWNkIAQs0tHQ?qr_)L*ZW!I#vnVaO4qzJc*WqCOn;bI|B(<1PN=4WgH)pj zh77KF1zh8F&YQ$bUqU3LRzBv$Csc*8Qf5h2TpC-nu|>uf6>Pm(E=dcU8CM!J)u+C8fPWMvi}Z$`?u}$-m`b?< zeJ@CFA+1Qmvq^?JeXfUR--oBu;py>q%0;ETqYtI>&Wze7?EiqEfFw0I&ZN$!8dHxd z5r%O@f%rBB>dlG>#%9ARX4nj~(}g9-w|=L&8`Y5N6@ND;pT;&^8|ei&e0;)^%vBsH zO?8|u44hPFm$yG~8jrqYm(>v(o2}AJSr|R3)*?uA*iC#Fg=JhuVu??%f{~qYAECKH zfn0-bBvA3xd@NgEF6|dWeysF$01l~qpR!Gn>m&kH=}`4vE-8Nma!HlxdOf#dLRw_5 zMQFt4Nq=6@`&PCI-a+E}o6QQ%lX7);Ep@R5$#Aq`o^dfhJ>i#}82mY!bl?b{7sSC2 ztMTUDlVPU=a$dVtO|pvvR*_EjNj&#`l)2qIrChBcri{!nd01sl`8{v?Uhs8(HmTzX zKVc$#{E0&m(DKrUl>3KQd9OU`*vdMl&Q3Gy41c1&Mh*k1q5d5C6h{PW*g|g1t|R1n zE$x{GHfE7dV_bXM?pNYm{U}!VZt)B4u}90wowuJrA;5m^XuhC~1*shKWfTwgzFx|H ziona)Z@XrBp6B0W+2asuy_eALSBV{FFpZ>u<(MrYka&cjqhwHQ)~?)m?{-aJpna>c o3L|D$+V+I4c0 -Reference · KrylovPreconditioners.jl

Reference

Index

Base.push!Method

For the L-factor: insert in row head column value For the U-factor: insert in column head row value

source
KrylovPreconditioners.add!Method

Sets v[idx] += a when idx is occupied, or sets v[idx] = a. Complexity is O(nnz). The prev_idx can be used to start the linear search at prev_idx, useful when multiple already sorted values are added.

source
KrylovPreconditioners.append_col!Function

Basically A[:, j] = scale * drop(y), where drop removes values less than drop. Note: sorts the nzind's of y, so that the column can be appended to a SparseMatrixCSC.

Resets the SparseVectorAccumulator.

Note: does not update A.colptr for columns > j + 1, as that is done during the steps.

source
KrylovPreconditioners.append_col!Method

Basically A[:, j] = scale * drop(y), where drop removes values less than drop.

Resets the InsertableSparseVector.

Note: does not update A.colptr for columns > j + 1, as that is done during the steps.

source
KrylovPreconditioners.backward_substitution!Method

Applies in-place backward substitution with the U factor of F, under the assumptions:

  1. U is stored transposed / row-wise
  2. U has no lower-triangular elements stored
  3. U has (nonzero) diagonal elements stored.
source
KrylovPreconditioners.overlapMethod
overlap(Graph, subset, level)

Given subset embedded within Graph, compute subset2 such that subset2 contains subset and all of its adjacent vertices.

source
KrylovPreconditioners.update!Method
function update!(p, J::SparseMatrixCSC)

Update the preconditioner p from the sparse Jacobian J in CSC format for the CPU

Note that this implements the same algorithm as for the GPU and becomes very slow on CPU with growing number of blocks.

source
LinearAlgebra.axpy!Method

Add a part of a SparseMatrixCSC column to a SparseVectorAccumulator, starting at a given index until the end.

source
SparseArrays.nnzMethod

Returns the number of nonzeros of the L and U factor combined.

Excludes the unit diagonal of the L factor, which is not stored.

source
KrylovPreconditioners.BlockJacobiPreconditionerType
BlockJacobiPreconditioner

Overlapping-Schwarz preconditioner.

Attributes

  • nblocks::Int64: Number of partitions or blocks.
  • blocksize::Int64: Size of each block.
  • partitions::Vector{Vector{Int64}}:npart` partitions stored as lists
  • cupartitions: partitions transfered to the GPU
  • lpartitions::Vector{Int64}`: Length of each partitions.
  • culpartitions::Vector{Int64}`: Length of each partitions, on the GPU.
  • blocks: Dense blocks of the block-Jacobi
  • cublocks: Js transfered to the GPU
  • map: The partitions as a mapping to construct views
  • cumap: cumap transferred to the GPU`
  • part: Partitioning as output by Metis
  • cupart: part transferred to the GPU
source
KrylovPreconditioners.InsertableSparseVectorType

InsertableSparseVector accumulates the sparse vector result from SpMV. Initialization requires O(N) work, therefore the data structure is reused. Insertion requires O(nnz) at worst, as insertion sort is used.

source
KrylovPreconditioners.LinkedListsType

The factor L is stored column-wise, but we need all nonzeros in row row. We already keep track of the first nonzero in each column (at most n indices). Take l = LinkedLists(n). Let l.head[row] be the column of some nonzero in row row. Then we can store the column of the next nonzero of row row in l.next[l.head[row]], etc. That "spot" is empty and there will never be a conflict because as long as we only store the first nonzero per column: the column is then a unique identifier.

source
KrylovPreconditioners.SortedSetType

SortedSet keeps track of a sorted set of integers ≤ N using insertion sort with a linked list structure in a pre-allocated vector. Requires O(N + 1) memory. Insertion goes via a linear scan in O(n) where n is the number of stored elements, but can be accelerated by passing along a known value in the set (which is useful when pushing in an already sorted list). The insertion itself requires O(1) operations due to the linked list structure. Provides iterators:

ints = SortedSet(10)
-push!(ints, 5)
-push!(ints, 3)
-for value in ints
-    println(value)
-end
source
+Reference · KrylovPreconditioners.jl

Reference

Index

KrylovPreconditioners.update!Method
function update!(p, J::SparseMatrixCSC)

Update the preconditioner p from the sparse Jacobian J in CSC format for the CPU

Note that this implements the same algorithm as for the GPU and becomes very slow on CPU with growing number of blocks.

source
KrylovPreconditioners.BlockJacobiPreconditionerType
BlockJacobiPreconditioner

Overlapping-Schwarz preconditioner.

Attributes

  • nblocks::Int64: Number of partitions or blocks.
  • blocksize::Int64: Size of each block.
  • partitions::Vector{Vector{Int64}}:npart` partitions stored as lists
  • cupartitions: partitions transfered to the GPU
  • lpartitions::Vector{Int64}`: Length of each partitions.
  • culpartitions::Vector{Int64}`: Length of each partitions, on the GPU.
  • blocks: Dense blocks of the block-Jacobi
  • cublocks: Js transfered to the GPU
  • map: The partitions as a mapping to construct views
  • cumap: cumap transferred to the GPU`
  • part: Partitioning as output by Metis
  • cupart: part transferred to the GPU
source
KrylovPreconditioners.backward_substitution!Function

Applies in-place backward substitution with the U factor of F, under the assumptions:

  1. U is stored transposed / row-wise
  2. U has no lower-triangular elements stored
  3. U has (nonzero) diagonal elements stored.
source
diff --git a/dev/search_index.js b/dev/search_index.js index 4c1ba31..c7fbb83 100644 --- a/dev/search_index.js +++ b/dev/search_index.js @@ -1,3 +1,3 @@ var documenterSearchIndex = {"docs": -[{"location":"reference/#Reference","page":"Reference","title":"Reference","text":"","category":"section"},{"location":"reference/#Index","page":"Reference","title":"Index","text":"","category":"section"},{"location":"reference/","page":"Reference","title":"Reference","text":"","category":"page"},{"location":"reference/","page":"Reference","title":"Reference","text":"Modules = [KrylovPreconditioners]\nOrder = [:function, :type]","category":"page"},{"location":"reference/#Base.empty!-Tuple{KrylovPreconditioners.InsertableSparseVector}","page":"Reference","title":"Base.empty!","text":"Empties the InsterableSparseVector in O(1) operations.\n\n\n\n\n\n","category":"method"},{"location":"reference/#Base.empty!-Tuple{KrylovPreconditioners.SortedSet}","page":"Reference","title":"Base.empty!","text":"Make the head pointer do a self-loop.\n\n\n\n\n\n","category":"method"},{"location":"reference/#Base.empty!-Tuple{KrylovPreconditioners.SparseVectorAccumulator}","page":"Reference","title":"Base.empty!","text":"Empty the SparseVectorAccumulator in O(1) operations.\n\n\n\n\n\n","category":"method"},{"location":"reference/#Base.push!-Tuple{KrylovPreconditioners.LinkedLists, Integer, Integer}","page":"Reference","title":"Base.push!","text":"For the L-factor: insert in row head column value For the U-factor: insert in column head row value\n\n\n\n\n\n","category":"method"},{"location":"reference/#Base.push!-Tuple{KrylovPreconditioners.SortedSet, Int64, Int64}","page":"Reference","title":"Base.push!","text":"Insert index after a known value after\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners._fillblock_gpu!-Tuple{Any}","page":"Reference","title":"KrylovPreconditioners._fillblock_gpu!","text":"_fillblock_gpu\n\nFill the dense blocks of the preconditioner from the sparse CSR matrix arrays\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.add!-Tuple{KrylovPreconditioners.InsertableSparseVector, Any, Integer, Integer}","page":"Reference","title":"KrylovPreconditioners.add!","text":"Sets v[idx] += a when idx is occupied, or sets v[idx] = a. Complexity is O(nnz). The prev_idx can be used to start the linear search at prev_idx, useful when multiple already sorted values are added.\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.add!-Tuple{KrylovPreconditioners.InsertableSparseVector, Any, Integer}","page":"Reference","title":"KrylovPreconditioners.add!","text":"Add without providing a previous index.\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.add!-Tuple{KrylovPreconditioners.SparseVectorAccumulator, Any, Any}","page":"Reference","title":"KrylovPreconditioners.add!","text":"Sets v[idx] += a when idx is occupied, or sets v[idx] = a. Complexity is O(1).\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.append_col!","page":"Reference","title":"KrylovPreconditioners.append_col!","text":"Basically A[:, j] = scale * drop(y), where drop removes values less than drop. Note: sorts the nzind's of y, so that the column can be appended to a SparseMatrixCSC.\n\nResets the SparseVectorAccumulator.\n\nNote: does not update A.colptr for columns > j + 1, as that is done during the steps.\n\n\n\n\n\n","category":"function"},{"location":"reference/#KrylovPreconditioners.append_col!-Union{Tuple{Tv}, Tuple{SparseArrays.SparseMatrixCSC{Tv}, KrylovPreconditioners.InsertableSparseVector{Tv}, Int64, Tv}, Tuple{SparseArrays.SparseMatrixCSC{Tv}, KrylovPreconditioners.InsertableSparseVector{Tv}, Int64, Tv, Tv}} where Tv","page":"Reference","title":"KrylovPreconditioners.append_col!","text":"Basically A[:, j] = scale * drop(y), where drop removes values less than drop.\n\nResets the InsertableSparseVector.\n\nNote: does not update A.colptr for columns > j + 1, as that is done during the steps.\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.backward_substitution!-Tuple{KrylovPreconditioners.ILUFactorization, AbstractVector}","page":"Reference","title":"KrylovPreconditioners.backward_substitution!","text":"Applies in-place backward substitution with the U factor of F, under the assumptions:\n\nU is stored transposed / row-wise\nU has no lower-triangular elements stored\nU has (nonzero) diagonal elements stored.\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.build_adjmatrix-Tuple{Any}","page":"Reference","title":"KrylovPreconditioners.build_adjmatrix","text":"build_adjmatrix\n\nBuild the adjacency matrix of a matrix A corresponding to the undirected graph\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.forward_substitution!-Tuple{KrylovPreconditioners.ILUFactorization, AbstractVector}","page":"Reference","title":"KrylovPreconditioners.forward_substitution!","text":"Applies in-place forward substitution with the L factor of F, under the assumptions:\n\nL is stored column-wise (unlike U)\nL has no upper triangular elements\nL has no diagonal elements\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.isoccupied-Tuple{KrylovPreconditioners.SparseVectorAccumulator, Integer}","page":"Reference","title":"KrylovPreconditioners.isoccupied","text":"Check whether idx is nonzero.\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.overlap-Tuple{Any, Any}","page":"Reference","title":"KrylovPreconditioners.overlap","text":"overlap(Graph, subset, level)\n\nGiven subset embedded within Graph, compute subset2 such that subset2 contains subset and all of its adjacent vertices.\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.update!-Tuple{BlockJacobiPreconditioner, SparseArrays.SparseMatrixCSC}","page":"Reference","title":"KrylovPreconditioners.update!","text":"function update!(p, J::SparseMatrixCSC)\n\nUpdate the preconditioner p from the sparse Jacobian J in CSC format for the CPU\n\nNote that this implements the same algorithm as for the GPU and becomes very slow on CPU with growing number of blocks.\n\n\n\n\n\n","category":"method"},{"location":"reference/#LinearAlgebra.axpy!-Tuple{Any, SparseArrays.SparseMatrixCSC, Any, Any, KrylovPreconditioners.SparseVectorAccumulator}","page":"Reference","title":"LinearAlgebra.axpy!","text":"Add a part of a SparseMatrixCSC column to a SparseVectorAccumulator, starting at a given index until the end.\n\n\n\n\n\n","category":"method"},{"location":"reference/#SparseArrays.nnz-Tuple{KrylovPreconditioners.ILUFactorization}","page":"Reference","title":"SparseArrays.nnz","text":"Returns the number of nonzeros of the L and U factor combined.\n\nExcludes the unit diagonal of the L factor, which is not stored.\n\n\n\n\n\n","category":"method"},{"location":"reference/#Base.Vector-Tuple{KrylovPreconditioners.SortedSet}","page":"Reference","title":"Base.Vector","text":"For debugging and testing\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.BlockJacobiPreconditioner","page":"Reference","title":"KrylovPreconditioners.BlockJacobiPreconditioner","text":"BlockJacobiPreconditioner\n\nOverlapping-Schwarz preconditioner.\n\nAttributes\n\nnblocks::Int64: Number of partitions or blocks.\nblocksize::Int64: Size of each block.\npartitions::Vector{Vector{Int64}}:npart` partitions stored as lists\ncupartitions: partitions transfered to the GPU\nlpartitions::Vector{Int64}`: Length of each partitions.\nculpartitions::Vector{Int64}`: Length of each partitions, on the GPU.\nblocks: Dense blocks of the block-Jacobi\ncublocks: Js transfered to the GPU\nmap: The partitions as a mapping to construct views\ncumap: cumap transferred to the GPU`\npart: Partitioning as output by Metis\ncupart: part transferred to the GPU\n\n\n\n\n\n","category":"type"},{"location":"reference/#KrylovPreconditioners.InsertableSparseVector","page":"Reference","title":"KrylovPreconditioners.InsertableSparseVector","text":"InsertableSparseVector accumulates the sparse vector result from SpMV. Initialization requires O(N) work, therefore the data structure is reused. Insertion requires O(nnz) at worst, as insertion sort is used.\n\n\n\n\n\n","category":"type"},{"location":"reference/#KrylovPreconditioners.LinkedLists","page":"Reference","title":"KrylovPreconditioners.LinkedLists","text":"The factor L is stored column-wise, but we need all nonzeros in row row. We already keep track of the first nonzero in each column (at most n indices). Take l = LinkedLists(n). Let l.head[row] be the column of some nonzero in row row. Then we can store the column of the next nonzero of row row in l.next[l.head[row]], etc. That \"spot\" is empty and there will never be a conflict because as long as we only store the first nonzero per column: the column is then a unique identifier.\n\n\n\n\n\n","category":"type"},{"location":"reference/#KrylovPreconditioners.SortedSet","page":"Reference","title":"KrylovPreconditioners.SortedSet","text":"SortedSet keeps track of a sorted set of integers ≤ N using insertion sort with a linked list structure in a pre-allocated vector. Requires O(N + 1) memory. Insertion goes via a linear scan in O(n) where n is the number of stored elements, but can be accelerated by passing along a known value in the set (which is useful when pushing in an already sorted list). The insertion itself requires O(1) operations due to the linked list structure. Provides iterators:\n\nints = SortedSet(10)\npush!(ints, 5)\npush!(ints, 3)\nfor value in ints\n println(value)\nend\n\n\n\n\n\n","category":"type"},{"location":"#Home","page":"Home","title":"KrylovPreconditioners.jl documentation","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"This package provides a collection of preconditioners.","category":"page"},{"location":"#How-to-Cite","page":"Home","title":"How to Cite","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"If you use KrylovPreconditioners.jl in your work, please cite using the format given in CITATION.cff.","category":"page"},{"location":"#How-to-Install","page":"Home","title":"How to Install","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"KrylovPreconditioners.jl can be installed and tested through the Julia package manager:","category":"page"},{"location":"","page":"Home","title":"Home","text":"julia> ]\npkg> add KrylovPreconditioners\npkg> test KrylovPreconditioners","category":"page"},{"location":"#Bug-reports-and-discussions","page":"Home","title":"Bug reports and discussions","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"If you think you found a bug, feel free to open an issue. Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please.","category":"page"},{"location":"","page":"Home","title":"Home","text":"If you want to ask a question not suited for a bug report, feel free to start a discussion here. This forum is for general discussion about this repository and the JuliaSmoothOptimizers organization, so questions about any of our packages are welcome.","category":"page"}] +[{"location":"triangular_operators/#triangular_operators","page":"Triangular operators","title":"Triangular operators","text":"","category":"section"},{"location":"triangular_operators/","page":"Triangular operators","title":"Triangular operators","text":"TriangularOperator\nupdate!(::AbstractTriangularOperator, ::Any)","category":"page"},{"location":"triangular_operators/#KrylovPreconditioners.TriangularOperator","page":"Triangular operators","title":"KrylovPreconditioners.TriangularOperator","text":"TriangularOperator(A, uplo::Char, diag::Char; nrhs::Int=1, transa::Char='N')\n\nCreate a triangular operator for efficient solution of sparse triangular systems on GPU architectures. Supports sparse matrices stored on NVIDIA, AMD, and Intel GPUs.\n\nInput arguments\n\nA: A sparse matrix on the GPU representing the triangular system to be solved;\nuplo: Specifies whether the triangular matrix A is upper triangular ('U') or lower triangular ('L');\ndiag: Indicates whether the diagonal is unit ('U') or non-unit ('N');\nnrhs: Specifies the number of columns for the right-hand side(s). Defaults to 1, corresponding to solving triangular systems with a single vector as the right-hand side;\ntransa: Determines how the matrix A is applied during the triangle solves; 'N' for no transposition, 'T' for transpose, and 'C' for conjugate transpose.\n\nOutput argument\n\nop: An instance of AbstractTriangularOperator representing the triangular operator for the specified sparse matrix and parameters.\n\n\n\n\n\n","category":"function"},{"location":"triangular_operators/#KrylovPreconditioners.update!-Tuple{AbstractTriangularOperator, Any}","page":"Triangular operators","title":"KrylovPreconditioners.update!","text":"update!(op::AbstractTriangularOperator, A)\n\nUpdate the sparse matrix A associated with the given AbstractTriangularOperator without the need to reallocate buffers or repeat the structural analysis phase for detecting parallelism for sparse triangular solves. A and the operator op must have the same sparsity pattern, enabling efficient reuse of existing resources.\n\nInput arguments\n\nop: The triangular operator to update;\nA: The new sparse matrix to associate with the operator.\n\n\n\n\n\n","category":"method"},{"location":"triangular_operators/#Nvidia-GPUs","page":"Triangular operators","title":"Nvidia GPUs","text":"","category":"section"},{"location":"triangular_operators/","page":"Triangular operators","title":"Triangular operators","text":"Sparse matrices have a specific storage on Nvidia GPUs (CuSparseMatrixCSC, CuSparseMatrixCSR or CuSparseMatrixCOO):","category":"page"},{"location":"triangular_operators/","page":"Triangular operators","title":"Triangular operators","text":"using CUDA, CUDA.CUSPARSE\nusing SparseArrays\nusing KrylovPreconditioners\n\nif CUDA.functional()\n # CPU Arrays\n A_cpu = sprand(100, 100, 0.3)\n\n # GPU Arrays\n A_csc_gpu = CuSparseMatrixCSC(A_cpu)\n A_csr_gpu = CuSparseMatrixCSR(A_cpu)\n A_coo_gpu = CuSparseMatrixCOO(A_cpu)\n\n # Triangular operators\n op_csc = TriangularOperator(A_csc_gpu; uplo='L', diag='U', nrhs=1, transa='N')\n op_csr = TriangularOperator(A_csr_gpu; uplo='U', diag='N', nrhs=1, transa='T')\n op_coo = TriangularOperator(A_coo_gpu; uplo='L', diag='N', nrhs=5, transa='N')\nend","category":"page"},{"location":"triangular_operators/#AMD-GPUs","page":"Triangular operators","title":"AMD GPUs","text":"","category":"section"},{"location":"triangular_operators/","page":"Triangular operators","title":"Triangular operators","text":"Sparse matrices have a specific storage on AMD GPUs (ROCSparseMatrixCSC, ROCSparseMatrixCSR or ROCSparseMatrixCOO):","category":"page"},{"location":"triangular_operators/","page":"Triangular operators","title":"Triangular operators","text":"using AMDGPU, AMDGPU.rocSPARSE\nusing SparseArrays\nusing KrylovPreconditioners\n\nif AMDGPU.functional()\n # CPU Arrays\n A_cpu = sprand(200, 100, 0.3)\n\n # GPU Arrays\n A_csc_gpu = ROCSparseMatrixCSC(A_cpu)\n A_csr_gpu = ROCSparseMatrixCSR(A_cpu)\n A_coo_gpu = ROCSparseMatrixCOO(A_cpu)\n\n # Triangular operators\n op_csc = TriangularOperator(A_csc_gpu; uplo='L', diag='U', nrhs=1, transa='N')\n op_csr = TriangularOperator(A_csr_gpu; uplo='L', diag='U', nrhs=1, transa='T')\n op_coo = TriangularOperator(A_coo_gpu; uplo='L', diag='U', nrhs=5, transa='N')\nend","category":"page"},{"location":"triangular_operators/#Intel-GPUs","page":"Triangular operators","title":"Intel GPUs","text":"","category":"section"},{"location":"triangular_operators/","page":"Triangular operators","title":"Triangular operators","text":"Sparse matrices have a specific storage on Intel GPUs (oneSparseMatrixCSR):","category":"page"},{"location":"triangular_operators/","page":"Triangular operators","title":"Triangular operators","text":"using oneAPI, oneAPI.oneMKL\nusing SparseArrays\nusing KrylovPreconditioners\n\nif oneAPI.functional()\n # CPU Arrays\n A_cpu = sprand(T, 20, 10, 0.3)\n\n # GPU Arrays\n A_csr_gpu = oneSparseMatrixCSR(A_cpu)\n\n # Triangular operator\n op_csr = TriangularOperator(A_csr_gpu; uplo='L', diag='U', nrhs=1, transa='N')\nend","category":"page"},{"location":"krylov_operators/#krylov_operators","page":"Krylov operators","title":"Krylov operators","text":"","category":"section"},{"location":"krylov_operators/","page":"Krylov operators","title":"Krylov operators","text":"KrylovOperator\nupdate!(::AbstractKrylovOperator, ::Any)","category":"page"},{"location":"krylov_operators/#KrylovPreconditioners.KrylovOperator","page":"Krylov operators","title":"KrylovPreconditioners.KrylovOperator","text":"KrylovOperator(A; nrhs::Int=1, transa::Char='N')\n\nCreate a Krylov operator to accelerate sparse matrix-vector or matrix-matrix products on GPU architectures. The operator is compatible with sparse matrices stored on NVIDIA, AMD, and Intel GPUs.\n\nInput arguments\n\nA: The sparse matrix on the GPU that serves as the operator for matrix-vector or matrix-matrix products;\nnrhs: Specifies the number of columns for the right-hand sides. Defaults to 1 for standard matrix-vector products;\ntransa: Determines how the matrix A is applied during the products; 'N' for no transposition, 'T' for transpose, and 'C' for conjugate transpose.\n\nOutput argument\n\nop: An instance of AbstractKrylovOperator representing the Krylov operator for the specified sparse matrix and parameters.\n\n\n\n\n\n","category":"function"},{"location":"krylov_operators/#KrylovPreconditioners.update!-Tuple{AbstractKrylovOperator, Any}","page":"Krylov operators","title":"KrylovPreconditioners.update!","text":"update!(op::AbstractKrylovOperator, A)\n\nUpdate the sparse matrix A associated with the given AbstractKrylovOperator without the need to reallocate buffers or repeat the structural analysis phase for detecting parallelism for sparse matrix-vector or matrix-matrix products. A and the operator op must have the same sparsity pattern, enabling efficient reuse of existing resources.\n\nInput arguments\n\nop: The Krylov operator to update;\nA: The new sparse matrix to associate with the operator.\n\n\n\n\n\n","category":"method"},{"location":"krylov_operators/#Nvidia-GPUs","page":"Krylov operators","title":"Nvidia GPUs","text":"","category":"section"},{"location":"krylov_operators/","page":"Krylov operators","title":"Krylov operators","text":"Sparse matrices have a specific storage on Nvidia GPUs (CuSparseMatrixCSC, CuSparseMatrixCSR or CuSparseMatrixCOO):","category":"page"},{"location":"krylov_operators/","page":"Krylov operators","title":"Krylov operators","text":"using CUDA, CUDA.CUSPARSE\nusing SparseArrays\nusing KrylovPreconditioners\n\nif CUDA.functional()\n # CPU Arrays\n A_cpu = sprand(200, 100, 0.3)\n\n # GPU Arrays\n A_csc_gpu = CuSparseMatrixCSC(A_cpu)\n A_csr_gpu = CuSparseMatrixCSR(A_cpu)\n A_coo_gpu = CuSparseMatrixCOO(A_cpu)\n\n # Krylov operators\n op_csc = KrylovOperator(A_csc_gpu; nrhs=1, transa='N')\n op_csr = KrylovOperator(A_csr_gpu; nrhs=1, transa='T')\n op_coo = KrylovOperator(A_coo_gpu; nrhs=5, transa='N')\nend","category":"page"},{"location":"krylov_operators/#AMD-GPUs","page":"Krylov operators","title":"AMD GPUs","text":"","category":"section"},{"location":"krylov_operators/","page":"Krylov operators","title":"Krylov operators","text":"Sparse matrices have a specific storage on AMD GPUs (ROCSparseMatrixCSC, ROCSparseMatrixCSR or ROCSparseMatrixCOO):","category":"page"},{"location":"krylov_operators/","page":"Krylov operators","title":"Krylov operators","text":"using AMDGPU, AMDGPU.rocSPARSE\nusing SparseArrays\nusing KrylovPreconditioners\n\nif AMDGPU.functional()\n # CPU Arrays\n A_cpu = sprand(200, 100, 0.3)\n\n # GPU Arrays\n A_csc_gpu = ROCSparseMatrixCSC(A_cpu)\n A_csr_gpu = ROCSparseMatrixCSR(A_cpu)\n A_coo_gpu = ROCSparseMatrixCOO(A_cpu)\n\n # Krylov operators\n op_csc = KrylovOperator(A_csc_gpu; nrhs=1, transa='N')\n op_csr = KrylovOperator(A_csr_gpu; nrhs=1, transa='T')\n op_coo = KrylovOperator(A_coo_gpu; nrhs=5, transa='N')\nend","category":"page"},{"location":"krylov_operators/#Intel-GPUs","page":"Krylov operators","title":"Intel GPUs","text":"","category":"section"},{"location":"krylov_operators/","page":"Krylov operators","title":"Krylov operators","text":"Sparse matrices have a specific storage on Intel GPUs (oneSparseMatrixCSR):","category":"page"},{"location":"krylov_operators/","page":"Krylov operators","title":"Krylov operators","text":"using oneAPI, oneAPI.oneMKL\nusing SparseArrays\nusing KrylovPreconditioners\n\nif oneAPI.functional()\n # CPU Arrays\n A_cpu = sprand(Float32, 20, 10, 0.3)\n\n # GPU Arrays\n A_csr_gpu = oneSparseMatrixCSR(A_cpu)\n\n # Krylov operator\n op_csr = KrylovOperator(A_csr_gpu; nrhs=1, transa='N')\nend","category":"page"},{"location":"reference/#Reference","page":"Reference","title":"Reference","text":"","category":"section"},{"location":"reference/#Index","page":"Reference","title":"Index","text":"","category":"section"},{"location":"reference/","page":"Reference","title":"Reference","text":"","category":"page"},{"location":"reference/","page":"Reference","title":"Reference","text":"KrylovPreconditioners.update!(::BlockJacobiPreconditioner, ::SparseMatrixCSC)\nKrylovPreconditioners.BlockJacobiPreconditioner\nKrylovPreconditioners.backward_substitution!\nKrylovPreconditioners.forward_substitution!","category":"page"},{"location":"reference/#KrylovPreconditioners.update!-Tuple{BlockJacobiPreconditioner, SparseMatrixCSC}","page":"Reference","title":"KrylovPreconditioners.update!","text":"function update!(p, J::SparseMatrixCSC)\n\nUpdate the preconditioner p from the sparse Jacobian J in CSC format for the CPU\n\nNote that this implements the same algorithm as for the GPU and becomes very slow on CPU with growing number of blocks.\n\n\n\n\n\n","category":"method"},{"location":"reference/#KrylovPreconditioners.BlockJacobiPreconditioner","page":"Reference","title":"KrylovPreconditioners.BlockJacobiPreconditioner","text":"BlockJacobiPreconditioner\n\nOverlapping-Schwarz preconditioner.\n\nAttributes\n\nnblocks::Int64: Number of partitions or blocks.\nblocksize::Int64: Size of each block.\npartitions::Vector{Vector{Int64}}:npart` partitions stored as lists\ncupartitions: partitions transfered to the GPU\nlpartitions::Vector{Int64}`: Length of each partitions.\nculpartitions::Vector{Int64}`: Length of each partitions, on the GPU.\nblocks: Dense blocks of the block-Jacobi\ncublocks: Js transfered to the GPU\nmap: The partitions as a mapping to construct views\ncumap: cumap transferred to the GPU`\npart: Partitioning as output by Metis\ncupart: part transferred to the GPU\n\n\n\n\n\n","category":"type"},{"location":"reference/#KrylovPreconditioners.backward_substitution!","page":"Reference","title":"KrylovPreconditioners.backward_substitution!","text":"Applies in-place backward substitution with the U factor of F, under the assumptions:\n\nU is stored transposed / row-wise\nU has no lower-triangular elements stored\nU has (nonzero) diagonal elements stored.\n\n\n\n\n\n","category":"function"},{"location":"reference/#KrylovPreconditioners.forward_substitution!","page":"Reference","title":"KrylovPreconditioners.forward_substitution!","text":"Applies in-place forward substitution with the L factor of F, under the assumptions:\n\nL is stored column-wise (unlike U)\nL has no upper triangular elements\nL has no diagonal elements\n\n\n\n\n\n","category":"function"},{"location":"#Home","page":"Home","title":"KrylovPreconditioners.jl documentation","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"This package provides a collection of preconditioners.","category":"page"},{"location":"#How-to-Cite","page":"Home","title":"How to Cite","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"If you use KrylovPreconditioners.jl in your work, please cite using the format given in CITATION.cff.","category":"page"},{"location":"#How-to-Install","page":"Home","title":"How to Install","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"KrylovPreconditioners.jl can be installed and tested through the Julia package manager:","category":"page"},{"location":"","page":"Home","title":"Home","text":"julia> ]\npkg> add KrylovPreconditioners\npkg> test KrylovPreconditioners","category":"page"},{"location":"#Bug-reports-and-discussions","page":"Home","title":"Bug reports and discussions","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"If you think you found a bug, feel free to open an issue. Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please.","category":"page"},{"location":"","page":"Home","title":"Home","text":"If you want to ask a question not suited for a bug report, feel free to start a discussion here. This forum is for general discussion about this repository and the JuliaSmoothOptimizers organization, so questions about any of our packages are welcome.","category":"page"}] } diff --git a/dev/triangular_operators/index.html b/dev/triangular_operators/index.html new file mode 100644 index 0000000..abbc724 --- /dev/null +++ b/dev/triangular_operators/index.html @@ -0,0 +1,49 @@ + +Triangular operators · KrylovPreconditioners.jl

Triangular operators

KrylovPreconditioners.TriangularOperatorFunction
TriangularOperator(A, uplo::Char, diag::Char; nrhs::Int=1, transa::Char='N')

Create a triangular operator for efficient solution of sparse triangular systems on GPU architectures. Supports sparse matrices stored on NVIDIA, AMD, and Intel GPUs.

Input arguments

  • A: A sparse matrix on the GPU representing the triangular system to be solved;
  • uplo: Specifies whether the triangular matrix A is upper triangular ('U') or lower triangular ('L');
  • diag: Indicates whether the diagonal is unit ('U') or non-unit ('N');
  • nrhs: Specifies the number of columns for the right-hand side(s). Defaults to 1, corresponding to solving triangular systems with a single vector as the right-hand side;
  • transa: Determines how the matrix A is applied during the triangle solves; 'N' for no transposition, 'T' for transpose, and 'C' for conjugate transpose.

Output argument

  • op: An instance of AbstractTriangularOperator representing the triangular operator for the specified sparse matrix and parameters.
source
KrylovPreconditioners.update!Method
update!(op::AbstractTriangularOperator, A)

Update the sparse matrix A associated with the given AbstractTriangularOperator without the need to reallocate buffers or repeat the structural analysis phase for detecting parallelism for sparse triangular solves. A and the operator op must have the same sparsity pattern, enabling efficient reuse of existing resources.

Input arguments

  • op: The triangular operator to update;
  • A: The new sparse matrix to associate with the operator.
source

Nvidia GPUs

Sparse matrices have a specific storage on Nvidia GPUs (CuSparseMatrixCSC, CuSparseMatrixCSR or CuSparseMatrixCOO):

using CUDA, CUDA.CUSPARSE
+using SparseArrays
+using KrylovPreconditioners
+
+if CUDA.functional()
+  # CPU Arrays
+  A_cpu = sprand(100, 100, 0.3)
+
+  # GPU Arrays
+  A_csc_gpu = CuSparseMatrixCSC(A_cpu)
+  A_csr_gpu = CuSparseMatrixCSR(A_cpu)
+  A_coo_gpu = CuSparseMatrixCOO(A_cpu)
+
+  # Triangular operators
+  op_csc = TriangularOperator(A_csc_gpu; uplo='L', diag='U', nrhs=1, transa='N')
+  op_csr = TriangularOperator(A_csr_gpu; uplo='U', diag='N', nrhs=1, transa='T')
+  op_coo = TriangularOperator(A_coo_gpu; uplo='L', diag='N', nrhs=5, transa='N')
+end

AMD GPUs

Sparse matrices have a specific storage on AMD GPUs (ROCSparseMatrixCSC, ROCSparseMatrixCSR or ROCSparseMatrixCOO):

using AMDGPU, AMDGPU.rocSPARSE
+using SparseArrays
+using KrylovPreconditioners
+
+if AMDGPU.functional()
+  # CPU Arrays
+  A_cpu = sprand(200, 100, 0.3)
+
+  # GPU Arrays
+  A_csc_gpu = ROCSparseMatrixCSC(A_cpu)
+  A_csr_gpu = ROCSparseMatrixCSR(A_cpu)
+  A_coo_gpu = ROCSparseMatrixCOO(A_cpu)
+
+  # Triangular operators
+  op_csc = TriangularOperator(A_csc_gpu; uplo='L', diag='U', nrhs=1, transa='N')
+  op_csr = TriangularOperator(A_csr_gpu; uplo='L', diag='U', nrhs=1, transa='T')
+  op_coo = TriangularOperator(A_coo_gpu; uplo='L', diag='U', nrhs=5, transa='N')
+end

Intel GPUs

Sparse matrices have a specific storage on Intel GPUs (oneSparseMatrixCSR):

using oneAPI, oneAPI.oneMKL
+using SparseArrays
+using KrylovPreconditioners
+
+if oneAPI.functional()
+  # CPU Arrays
+  A_cpu = sprand(T, 20, 10, 0.3)
+
+  # GPU Arrays
+  A_csr_gpu = oneSparseMatrixCSR(A_cpu)
+
+  # Triangular operator
+  op_csr = TriangularOperator(A_csr_gpu; uplo='L', diag='U', nrhs=1, transa='N')
+end