Skip to content

Commit

Permalink
gh-224: add matrix transpose
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorOrachyov committed Sep 2, 2023
1 parent 2e2d856 commit 649f2bc
Show file tree
Hide file tree
Showing 17 changed files with 454 additions and 35 deletions.
14 changes: 0 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -320,20 +320,6 @@ add_library(spla SHARED
src/cpu/cpu_format_dok_vec.hpp
src/cpu/cpu_format_lil.hpp
src/cpu/cpu_formats.hpp
src/cpu/cpu_m_reduce.hpp
src/cpu/cpu_m_reduce_by_column.hpp
src/cpu/cpu_m_reduce_by_row.hpp
src/cpu/cpu_mxm.hpp
src/cpu/cpu_mxmT_masked.hpp
src/cpu/cpu_mxv.hpp
src/cpu/cpu_vxm.hpp
src/cpu/cpu_v_assign.hpp
src/cpu/cpu_v_count_mf.hpp
src/cpu/cpu_v_eadd.hpp
src/cpu/cpu_v_eadd_fdb.hpp
src/cpu/cpu_v_emult.hpp
src/cpu/cpu_v_map.hpp
src/cpu/cpu_v_reduce.hpp
src/util/pair_hash.hpp
src/profiling/time_profiler.cpp
src/profiling/time_profiler.hpp
Expand Down
1 change: 1 addition & 0 deletions include/spla.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ SPLA_API spla_Status spla_Exec_vxm_masked(spla_Vector r, spla_Vector mask, spla_
SPLA_API spla_Status spla_Exec_m_reduce_by_row(spla_Vector r, spla_Matrix M, spla_OpBinary op_reduce, spla_Scalar init, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_m_reduce_by_column(spla_Vector r, spla_Matrix M, spla_OpBinary op_reduce, spla_Scalar init, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_m_reduce(spla_Scalar r, spla_Scalar s, spla_Matrix M, spla_OpBinary op_reduce, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_m_transpose(spla_Matrix R, spla_Matrix M, spla_OpUnary op_apply, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_v_eadd(spla_Vector r, spla_Vector u, spla_Vector v, spla_OpBinary op, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_v_emult(spla_Vector r, spla_Vector u, spla_Vector v, spla_OpBinary op, spla_Descriptor desc, spla_ScheduleTask* task);
SPLA_API spla_Status spla_Exec_v_eadd_fdb(spla_Vector r, spla_Vector v, spla_Vector fdb, spla_OpBinary op, spla_Descriptor desc, spla_ScheduleTask* task);
Expand Down
20 changes: 20 additions & 0 deletions include/spla/exec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,26 @@ namespace spla {
ref_ptr<Descriptor> desc = ref_ptr<Descriptor>(),
ref_ptr<ScheduleTask>* task_hnd = nullptr);

/**
* @brief Execute (schedule) matrix transpose operation
*
* @note Pass valid `task_hnd` to store as a task, rather then execute immediately.
*
* @param R Matrix to store result
* @param M Matrix to transpose
* @param op_apply Unary op to transform value
* @param desc Scheduled task descriptor; default is null
* @param task_hnd Optional task hnd; pass not-null pointer to store task
*
* @return Status on task execution or status on hnd creation
*/
SPLA_API Status exec_m_transpose(
ref_ptr<Matrix> R,
ref_ptr<Matrix> M,
ref_ptr<OpUnary> op_apply,
ref_ptr<Descriptor> desc = ref_ptr<Descriptor>(),
ref_ptr<ScheduleTask>* task_hnd = nullptr);

/**
* @brief Execute (schedule) element-wise addition by structure of two vectors
*
Expand Down
2 changes: 1 addition & 1 deletion include/spla/type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ namespace spla {
SPLA_API virtual int get_id() = 0;
};

using T_BOOL = bool;
using T_INT = std::int32_t;
using T_UINT = std::uint32_t;
using T_FLOAT = float;
using T_BOOL = bool;

SPLA_API extern ref_ptr<Type> BOOL;
SPLA_API extern ref_ptr<Type> INT;
Expand Down
11 changes: 8 additions & 3 deletions python/example.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from pyspla import *

u = Vector.from_lists([0, 1], [10, 20], 4, INT)
v = Vector.from_lists([1, 3], [-5, 12], 4, INT)
print(u.emult(INT.PLUS, v))
M = Matrix.from_lists([0, 1, 2], [3, 2, 0], [-5, 3, 9], (3, 4), INT)
print(M)

print(M.transpose())

print(M.transpose(op_apply=INT.UONE))

print(M.transpose(op_apply=INT.AINV))
28 changes: 17 additions & 11 deletions python/pyspla/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@
Performance
-----------
Spla shows greate performance comparing to a Nvidia CUDA based optimized GraphBLAST library, processing large graphs
in extreme cases counting 1 BILLION edges with speed and without memory issues. Also spla shows outstanding performance
in Page-Rank algorithms, outperfoming low-level Nvidia CUDA highly-optimized Gunronck library. Spla shows scalability
on GPUs on Intel, Nvidia and AMD with acceptable performance. Spla can be run even on integrated GPUs. Here you can
get good speedup, what is much faster than `scipy` or `networkx`.
More details with performance study given down bellow.
**Comparison on a Nvidia GPU**
![stats](../../docs/stats/rq1_rel_compact.png)
Expand Down Expand Up @@ -174,23 +182,21 @@
Containers
----------
Library provides fundamental generalized linear algebra containers
for data storage and mathematical computations. These containers
are generalized, so any of built-in types may be used to parametrize
type of data. Containers have sparse formats by default, so it is
possible to create large-dimension but low data containers. Containers
are storage-invariant, so the best format for the storage is automatically
managed by container internally. All required format conversion done
in the context of particular primitive usage.
Library provides fundamental generalized linear algebra containers for data storage and mathematical computations.
These containers are generalized, so any of built-in types may be used to parametrize type of data. Containers
have sparse formats by default, so it is possible to create large-dimension but low data containers. Containers
are storage-invariant, so the best format for the storage is automatically managed by container internally.
All required format conversion done in the context of particular primitive usage.
Types
-----
Library provides a set of standard and common built-in data types. Library value types
differ a bit from a classic type definition. In spla library type is essentially is a
differ a bit from a classic type definition. In `spla` library type is essentially is a
storage characteristic, which defines count and layout of bytes per element. User
can interpret stored data as her/she wants. Spla types set is limited due to the nature
of GPUs accelerations, where arbitrary layout of data causes significant performance penalties.
Types such as `int` `float` `uint` `bool` are supported. More types can be added on demand.
Ops
---
Expand All @@ -203,8 +209,8 @@
Math operations
---------------
Library provides as of high-level linera algebra operations over matrices and vectors with
parametrization by binary, unary and select `ops`. There is avalable implementation for
Library provides a set of high-level linera algebra operations over matrices and vectors with
parametrization by binary, unary and select `ops`. There is avalable implementations for
general `mxm` matrix-matrix, masked `mxmT` matrix-matrix, masked `mxv` matrix-vector,
masked `vxm` vector-matrix products, matrix and vector reductions, assignment, and so on.
Most operations have both CPU and GPU implementation. Thus, you will have GPU performance
Expand Down
3 changes: 3 additions & 0 deletions python/pyspla/bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ def load_library(lib_path):
_spla.spla_Exec_m_reduce_by_row.restype = _status_t
_spla.spla_Exec_m_reduce_by_column.restype = _status_t
_spla.spla_Exec_m_reduce.restype = _status_t
_spla.spla_Exec_m_transpose.restype = _status_t
_spla.spla_Exec_v_eadd.restype = _status_t
_spla.spla_Exec_v_emult.restype = _status_t
_spla.spla_Exec_v_eadd_fdb.restype = _status_t
Expand All @@ -576,6 +577,8 @@ def load_library(lib_path):
[_object_t, _object_t, _object_t, _object_t, _object_t, _p_object_t]
_spla.spla_Exec_m_reduce.argtypes = \
[_object_t, _object_t, _object_t, _object_t, _object_t, _p_object_t]
_spla.spla_Exec_m_transpose.argtypes = \
[_object_t, _object_t, _object_t, _object_t, _p_object_t]
_spla.spla_Exec_v_eadd.argtypes = \
[_object_t, _object_t, _object_t, _object_t, _object_t, _p_object_t]
_spla.spla_Exec_v_emult.argtypes = \
Expand Down
112 changes: 112 additions & 0 deletions python/pyspla/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,42 @@ def dense(cls, shape, dtype=INT, fill_value=0):

return M

@classmethod
def diag(cls, shape, dtype=INT, diag_value=1):
"""
Diagonal matrix of desired shape and desired fill value on diagonal.
>>> M = Matrix.diag((5, 5), INT, -1)
>>> print(M)
'
0 1 2 3 4
0|-1 . . . .| 0
1| .-1 . . .| 1
2| . .-1 . .| 2
3| . . .-1 .| 3
4| . . . .-1| 4
0 1 2 3 4
'
:param shape: 2-tuple.
Size of the matrix.
:param dtype: optional: Type. default: INT.
Type of values matrix will have.
:param diag_value: optional: any. default: 1.
Optional value to fill the diagonal with.
:return: Matrix with main diagonal filled with value.
"""

M = Matrix(shape, dtype)

for i in range(min(shape[0], shape[1])):
M.set(i, i, diag_value)

return M

def mxm(self, M, op_mult, op_add, out=None, init=None, desc=None):
"""
General sparse-matrix by sparse-matrix product.
Expand Down Expand Up @@ -935,6 +971,82 @@ def reduce(self, op_reduce, out=None, init=None, desc=None):

return out

def transpose(self, out=None, op_apply=None, desc=None):
"""
Transpose matrix.
Generate 3x4 matrix with int source data.
>>> M = Matrix.from_lists([0, 1, 2], [3, 2, 0], [-5, 3, 9], (3, 4), INT)
>>> print(M)
'
0 1 2 3
0| . . .-5| 0
1| . . 3 .| 1
2| 9 . . .| 2
0 1 2 3
'
Transpose matrix `M` as usual and print result.
>>> print(M.transpose())
'
0 1 2
0| . . 9| 0
1| . . .| 1
2| . 3 .| 2
3|-5 . .| 3
0 1 2
'
Transpose by map each value to `1`, discarding prev value.
>>> print(M.transpose(op_apply=INT.UONE))
'
0 1 2
0| . . 1| 0
1| . . .| 1
2| . 1 .| 2
3| 1 . .| 3
0 1 2
'
Transpose and apply additive-inverse for each value effectively changing the sign of values.
>>> print(M.transpose(op_apply=INT.AINV))
'
0 1 2
0| . .-9| 0
1| . . .| 1
2| .-3 .| 2
3| 5 . .| 3
0 1 2
'
:param out: optional: Matrix: default: none.
Optional matrix to store result.
:param op_apply: optional: OpUnary. default: None.
Optional unary function to apply on transposition.
:param desc: optional: Descriptor. default: None.
Optional descriptor object to configure the execution.
:return: Transposed matrix.
"""

if out is None:
out = Matrix(shape=(self.n_cols, self.n_rows), dtype=self.dtype)
if op_apply is None:
op_apply = self.dtype.IDENTITY

assert out
assert op_apply
assert out.n_rows == self.n_cols
assert out.n_cols == self.n_rows
assert out.dtype == self.dtype

check(backend().spla_Exec_m_transpose(out.hnd, self.hnd, op_apply.hnd,
self._get_desc(desc), self._get_task(None)))

return out

def __str__(self):
return self.to_string()

Expand Down
3 changes: 3 additions & 0 deletions src/binding/c_exec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ spla_Status spla_Exec_m_reduce_by_column(spla_Vector r, spla_Matrix M, spla_OpBi
spla_Status spla_Exec_m_reduce(spla_Scalar r, spla_Scalar s, spla_Matrix M, spla_OpBinary op_reduce, spla_Descriptor desc, spla_ScheduleTask* task) {
SPLA_WRAP_EXEC(exec_m_reduce, AS_S(r), AS_S(s), AS_M(M), AS_OB(op_reduce));
}
spla_Status spla_Exec_m_transpose(spla_Matrix R, spla_Matrix M, spla_OpUnary op_apply, spla_Descriptor desc, spla_ScheduleTask* task) {
SPLA_WRAP_EXEC(exec_m_transpose, AS_M(R), AS_M(M), AS_OU(op_apply));
}
spla_Status spla_Exec_v_eadd(spla_Vector r, spla_Vector u, spla_Vector v, spla_OpBinary op, spla_Descriptor desc, spla_ScheduleTask* task) {
SPLA_WRAP_EXEC(exec_v_eadd, AS_V(r), AS_V(u), AS_V(v), AS_OB(op));
}
Expand Down
6 changes: 6 additions & 0 deletions src/cpu/cpu_algo_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <cpu/cpu_m_reduce.hpp>
#include <cpu/cpu_m_reduce_by_column.hpp>
#include <cpu/cpu_m_reduce_by_row.hpp>
#include <cpu/cpu_m_transpose.hpp>
#include <cpu/cpu_mxm.hpp>
#include <cpu/cpu_mxmT_masked.hpp>
#include <cpu/cpu_mxv.hpp>
Expand Down Expand Up @@ -102,6 +103,11 @@ namespace spla {
g_registry->add(MAKE_KEY_CPU_0("m_reduce", UINT), std::make_shared<Algo_m_reduce_cpu<T_UINT>>());
g_registry->add(MAKE_KEY_CPU_0("m_reduce", FLOAT), std::make_shared<Algo_m_reduce_cpu<T_FLOAT>>());

// algorthm m_transpose
g_registry->add(MAKE_KEY_CPU_0("m_transpose", INT), std::make_shared<Algo_m_transpose_cpu<T_INT>>());
g_registry->add(MAKE_KEY_CPU_0("m_transpose", UINT), std::make_shared<Algo_m_transpose_cpu<T_UINT>>());
g_registry->add(MAKE_KEY_CPU_0("m_transpose", FLOAT), std::make_shared<Algo_m_transpose_cpu<T_FLOAT>>());

// algorthm mxv_masked
g_registry->add(MAKE_KEY_CPU_0("mxv_masked", INT), std::make_shared<Algo_mxv_masked_cpu<T_INT>>());
g_registry->add(MAKE_KEY_CPU_0("mxv_masked", UINT), std::make_shared<Algo_mxv_masked_cpu<T_UINT>>());
Expand Down
10 changes: 5 additions & 5 deletions src/cpu/cpu_m_reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,17 @@ namespace spla {
auto t = ctx.task.template cast_safe<ScheduleTask_m_reduce>();
auto M = t->M.template cast_safe<TMatrix<T>>();

if (M->is_valid(FormatMatrix::CpuDok)) {
return execute_dok(ctx);
if (M->is_valid(FormatMatrix::CpuCsr)) {
return execute_csr(ctx);
}
if (M->is_valid(FormatMatrix::CpuLil)) {
return execute_lil(ctx);
}
if (M->is_valid(FormatMatrix::CpuCsr)) {
return execute_csr(ctx);
if (M->is_valid(FormatMatrix::CpuDok)) {
return execute_dok(ctx);
}

return execute_dok(ctx);
return execute_csr(ctx);
}

private:
Expand Down
Loading

0 comments on commit 649f2bc

Please sign in to comment.