Skip to content

Commit

Permalink
gh-221: add mxm for matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorOrachyov committed Sep 1, 2023
1 parent 1a0d6a1 commit 6369d1c
Show file tree
Hide file tree
Showing 18 changed files with 484 additions and 38 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ add_library(spla SHARED
src/cpu/cpu_m_reduce.hpp
src/cpu/cpu_m_reduce_by_column.hpp
src/cpu/cpu_m_reduce_by_row.hpp
src/cpu/cpu_mxm.hpp
src/cpu/cpu_mxmT_masked.hpp
src/cpu/cpu_mxv.hpp
src/cpu/cpu_vxm.hpp
Expand Down
1 change: 1 addition & 0 deletions include/spla.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ SPLA_API spla_Status spla_Algorithm_tc(int* ntrins, spla_Matrix A, spla_Matrix B

/* Scheduling and operations execution */

SPLA_API spla_Status spla_Exec_mxm(spla_Matrix R, spla_Matrix A, spla_Matrix B, spla_OpBinary op_multiply, spla_OpBinary op_add, spla_Scalar init, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_mxmT_masked(spla_Matrix R, spla_Matrix mask, spla_Matrix A, spla_Matrix B, spla_OpBinary op_multiply, spla_OpBinary op_add, spla_OpSelect op_select, spla_Scalar init, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_mxv_masked(spla_Vector r, spla_Vector mask, spla_Matrix M, spla_Vector v, spla_OpBinary op_multiply, spla_OpBinary op_add, spla_OpSelect op_select, spla_Scalar init, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_vxm_masked(spla_Vector r, spla_Vector mask, spla_Vector v, spla_Matrix M, spla_OpBinary op_multiply, spla_OpBinary op_add, spla_OpSelect op_select, spla_Scalar init, spla_Descriptor desc, spla_ScheduleTask* task);
Expand Down
27 changes: 27 additions & 0 deletions include/spla/exec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,33 @@ namespace spla {
ref_ptr<Descriptor> desc = ref_ptr<Descriptor>(),
ref_ptr<ScheduleTask>* task_hnd = nullptr);

/**
* @brief Execute (schedule) sparse-matrix sparse-matrix product
*
* @note Operation equivalent semantic is `R = AB`
* @note Pass valid `task_hnd` to store as a task, rather then execute immediately.
*
* @param R Matrix to store result of the operation
* @param A Left matrix for product
* @param B Right matrix for product
* @param op_multiply Element-wise binary operator for matrices elements product
* @param op_add Element-wise binary operator for matrices elements products sum
* @param init Init of matrix row and column product
* @param desc Scheduled task descriptor; default is null
* @param task_hnd Optional task hnd; pass not-null pointer to store task
*
* @return Status on task execution or status on hnd creation
*/
SPLA_API Status exec_mxm(
ref_ptr<Matrix> R,
ref_ptr<Matrix> A,
ref_ptr<Matrix> B,
ref_ptr<OpBinary> op_multiply,
ref_ptr<OpBinary> op_add,
ref_ptr<Scalar> init,
ref_ptr<Descriptor> desc = ref_ptr<Descriptor>(),
ref_ptr<ScheduleTask>* task_hnd = nullptr);

/**
* @brief Execute (schedule) sparse masked matrix matrix-transposed product
*
Expand Down
37 changes: 6 additions & 31 deletions python/example.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,10 @@
from pyspla import *

M = Matrix.from_lists([0, 1, 2, 2], [1, 2, 0, 4], [1, 2, 3, 4], (3, 5), INT)
print(M)

def bfs(s: int, A: Matrix):
v = Vector(A.n_rows, INT) # to store depths

front = Vector.from_lists([s], [1], A.n_rows, INT) # front of new vertices to study
front_size = 1 # current front size
depth = Scalar(INT, 0) # depth of search
count = 0 # num of reached vertices

while front_size > 0: # while have something to study
depth += 1
count += front_size
v.assign(front, depth, op_assign=INT.SECOND, op_select=INT.NQZERO) # assign depths
front = front.vxm(v, A, op_mult=INT.LAND, op_add=INT.LOR, op_select=INT.EQZERO) # do traversal
front_size = front.reduce(op_reduce=INT.PLUS).get() # update front count to end algorithm

return v, count, depth.get()


I = [0, 1, 2, 2, 3]
J = [1, 2, 0, 3, 2]
V = [1, 1, 1, 1, 1]
A = Matrix.from_lists(I, J, V, shape=(4, 4), dtype=INT)
print(A)

v, c, d = bfs(0, A)
print(v)
print(c)
print(d)
N = Matrix.from_lists([0, 1, 2, 3], [2, 0, 1, 3], [2, 3, 4, 5], (5, 4), INT)
print(N)

M = Matrix.from_lists([0, 1, 2, 3], [0, 3, 3, 2], [1, 2, 3, 4], (4, 4), INT)
print(M)
print(M.reduce_by_column(INT.PLUS))
R = M.mxm(N, INT.MULT, INT.PLUS)
print(R)
9 changes: 5 additions & 4 deletions python/pyspla/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
automatically select GPU-based acceleration backed for computations.
>>> v, c, d = bfs(0, A)
Outhput the result vector with distances of reached vertices.
Output the result vector with distances of reached vertices.
>>> print(v)
'
0| 1
Expand Down Expand Up @@ -205,9 +205,10 @@
Library provides as of high-level linera algebra operations over matrices and vectors with
parametrization by binary, unary and select `ops`. There is avalable implementation for
masked `mxmT` matrix-matrix, `mxv` matrix-vector, `vxm` vector-matrix products, matrix and
vector reductions, assignment, and so on. Most operations have both CPU and GPU implementation.
Thus, you will have GPU performance in computations out of the box.
general `mxm` matrix-matrix, masked `mxmT` matrix-matrix, masked `mxv` matrix-vector,
masked `vxm` vector-matrix products, matrix and vector reductions, assignment, and so on.
Most operations have both CPU and GPU implementation. Thus, you will have GPU performance
in computations out of the box.
Details
-------
Expand Down
3 changes: 3 additions & 0 deletions python/pyspla/bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,7 @@ def load_library(lib_path):
_spla.spla_Algorithm_pr.argtypes = [_p_object_t, _object_t, _float, _float, _object_t]
_spla.spla_Algorithm_tc.argtypes = [_p_int, _object_t, _object_t, _object_t]

_spla.spla_Exec_mxm.restype = _status_t
_spla.spla_Exec_mxmT_masked.restype = _status_t
_spla.spla_Exec_mxv_masked.restype = _status_t
_spla.spla_Exec_vxm_masked.restype = _status_t
Expand All @@ -560,6 +561,8 @@ def load_library(lib_path):
_spla.spla_Exec_v_reduce.restype = _status_t
_spla.spla_Exec_v_count_mf.restype = _status_t

_spla.spla_Exec_mxm.argtypes = \
[_object_t, _object_t, _object_t, _object_t, _object_t, _object_t, _object_t, _p_object_t]
_spla.spla_Exec_mxmT_masked.argtypes = \
[_object_t, _object_t, _object_t, _object_t, _object_t, _object_t, _object_t, _object_t, _object_t, _p_object_t]
_spla.spla_Exec_mxv_masked.argtypes = \
Expand Down
85 changes: 83 additions & 2 deletions python/pyspla/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,87 @@ def dense(cls, shape, dtype=INT, fill_value=0):

return M

def mxm(self, M, op_mult, op_add, out=None, init=None, desc=None):
"""
General sparse-matrix by sparse-matrix product.
Generate left operand matrix of shape 3x5 for product.
>>> M = Matrix.from_lists([0, 1, 2, 2], [1, 2, 0, 4], [1, 2, 3, 4], (3, 5), INT)
>>> print(M)
'
0 1 2 3 4
0| . 1 . . .| 0
1| . . 2 . .| 1
2| 3 . . . 4| 2
0 1 2 3 4
'
Generate right operand matrix of shape 5x4 for product, num of rows must match.
>>> N = Matrix.from_lists([0, 1, 2, 3], [2, 0, 1, 3], [2, 3, 4, 5], (5, 4), INT)
>>> print(N)
'
0 1 2 3
0| . . 2 .| 0
1| 3 . . .| 1
2| . 4 . .| 2
3| . . . 5| 3
4| . . . .| 4
0 1 2 3
'
Evaluate product using respective element-wise operations.
>>> R = M.mxm(N, INT.MULT, INT.PLUS)
>>> print(R)
'
0 1 2 3
0| 3 . . .| 0
1| . 8 . .| 1
2| . . 6 .| 2
0 1 2 3
'
:param M: Matrix.
Matrix for a product.
:param op_mult: OpBinary.
Element-wise binary operator for matrix vector elements product.
:param op_add: OpBinary.
Element-wise binary operator for matrix vector products sum.
:param out: optional: Matrix: default: None.
Optional matrix to store result of product.
:param init: optional: Scalar: default: 0.
Optional neutral init value for reduction.
:param desc: optional: Descriptor. default: None.
Optional descriptor object to configure the execution.
:return: Matrix with result.
"""

if out is None:
out = Matrix(shape=(self.n_rows, M.n_cols), dtype=self.dtype)
if init is None:
init = Scalar(dtype=self.dtype, value=0)

assert M
assert out
assert init
assert out.dtype == self.dtype
assert M.dtype == self.dtype
assert init.dtype == self.dtype
assert out.n_rows == self.n_rows
assert out.n_cols == M.n_cols
assert self.n_cols == M.n_rows

check(backend().spla_Exec_mxm(out.hnd, self.hnd, M.hnd,
op_mult.hnd, op_add.hnd,
init.hnd, self._get_desc(desc), self._get_task(None)))

return out

def mxmT(self, mask, M, op_mult, op_add, op_select, out=None, init=None, desc=None):
"""
Masked sparse-matrix by sparse-matrix^T (transposed) product with sparse-mask.
Expand Down Expand Up @@ -610,8 +691,8 @@ def mxmT(self, mask, M, op_mult, op_add, op_select, out=None, init=None, desc=No
:param op_select: OpSelect.
Selection op to filter mask.
:param out: optional: Vector: default: None.
Optional vector to store result of product.
:param out: optional: Matrix: default: None.
Optional matrix to store result of product.
:param init: optional: Scalar: default: 0.
Optional neutral init value for reduction.
Expand Down
3 changes: 3 additions & 0 deletions src/binding/c_exec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
#define AS_OU(x) as_ref<spla::OpUnary>(x)
#define AS_OS(x) as_ref<spla::OpSelect>(x)

spla_Status spla_Exec_mxm(spla_Matrix R, spla_Matrix A, spla_Matrix B, spla_OpBinary op_multiply, spla_OpBinary op_add, spla_Scalar init, spla_Descriptor desc, spla_ScheduleTask* task) {
SPLA_WRAP_EXEC(exec_mxm, AS_M(R), AS_M(A), AS_M(B), AS_OB(op_multiply), AS_OB(op_add), AS_S(init));
}
spla_Status spla_Exec_mxmT_masked(spla_Matrix R, spla_Matrix mask, spla_Matrix A, spla_Matrix B, spla_OpBinary op_multiply, spla_OpBinary op_add, spla_OpSelect op_select, spla_Scalar init, spla_Descriptor desc, spla_ScheduleTask* task) {
SPLA_WRAP_EXEC(exec_mxmT_masked, AS_M(R), AS_M(mask), AS_M(A), AS_M(B), AS_OB(op_multiply), AS_OB(op_add), AS_OS(op_select), AS_S(init));
}
Expand Down
6 changes: 6 additions & 0 deletions src/cpu/cpu_algo_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <cpu/cpu_m_reduce.hpp>
#include <cpu/cpu_m_reduce_by_column.hpp>
#include <cpu/cpu_m_reduce_by_row.hpp>
#include <cpu/cpu_mxm.hpp>
#include <cpu/cpu_mxmT_masked.hpp>
#include <cpu/cpu_mxv.hpp>
#include <cpu/cpu_v_assign.hpp>
Expand Down Expand Up @@ -109,6 +110,11 @@ namespace spla {
g_registry->add(MAKE_KEY_CPU_0("mxmT_masked", INT), std::make_shared<Algo_mxmT_masked_cpu<T_INT>>());
g_registry->add(MAKE_KEY_CPU_0("mxmT_masked", UINT), std::make_shared<Algo_mxmT_masked_cpu<T_UINT>>());
g_registry->add(MAKE_KEY_CPU_0("mxmT_masked", FLOAT), std::make_shared<Algo_mxmT_masked_cpu<T_FLOAT>>());

// algorthm mxm
g_registry->add(MAKE_KEY_CPU_0("mxm", INT), std::make_shared<Algo_mxm_cpu<T_INT>>());
g_registry->add(MAKE_KEY_CPU_0("mxm", UINT), std::make_shared<Algo_mxm_cpu<T_UINT>>());
g_registry->add(MAKE_KEY_CPU_0("mxm", FLOAT), std::make_shared<Algo_mxm_cpu<T_FLOAT>>());
}

}// namespace spla
21 changes: 21 additions & 0 deletions src/cpu/cpu_format_coo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,27 @@ namespace spla {
in.values = 0;
}

template<typename T>
void cpu_coo_to_lil(uint n_rows,
const CpuCoo<T>& in,
CpuLil<T>& out) {
auto& Rr = out.Ar;

auto& Ai = in.Ai;
auto& Aj = in.Aj;
auto& Ax = in.Ax;

assert(Rr.size() == n_rows);

for (uint k = 0; k < in.values; k++) {
const uint i = Ai[k];
const uint j = Aj[k];
const T x = Ax[k];

Rr[i].emplace_back(j, x);
}
}

template<typename T>
void cpu_coo_to_dok(const CpuCoo<T>& in,
CpuDok<T>& out) {
Expand Down
Loading

0 comments on commit 6369d1c

Please sign in to comment.