Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vector backend #2505

Draft
wants to merge 21 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ MANIFEST
## Editor Files
.vscode/
.vs/
.idea/

## Build Files
*/bin/lpython
Expand Down
22 changes: 22 additions & 0 deletions ISSUES/Issue2496.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from numpy import array, empty, int16
from lpython import (i16, i32, c_p_pointer, Pointer, CPtr, TypeVar)


Tn = TypeVar("Tn")
Tm = TypeVar("Tm")
Tl = TypeVar("Tl")


def THIS_WORKS(Anm_l4: CPtr, Tn: i32, Tm: i32, l: i32) -> i16[Tn, Tm]:
A_nm: i16[Tn, Tm] = empty((Tn, Tm), dtype=int16)
return A_nm


def THIS_DOESNT_WORK(d: i16[Tm, Tn], b: CPtr, Tm: i32, Tn: i32) -> None:
B: Pointer[i16[:]] = c_p_pointer(b, i16[:], array([Tm * Tn]))
i: i32
j: i32
for i in range(Tm):
for j in range(Tn):
d[i, j] = B[(i * Tn) + j]

18 changes: 18 additions & 0 deletions ISSUES/Issue2499.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from lpython import i32, i16, Const
VR_SIZE: i32 = 32_768
l: Const[i32] = VR_SIZE
n: Const[i32] = 15
m: Const[i32] = 3
k: i32
M2: Const[i32] = 5
A_ik: i16
jj: i32
ii: i32
i: i32
for jj in range(0, l, VR_SIZE): # each VR-col chunk in B and C
for ii in range(0, n, M2): # each M2 block in A cols and B rows
for i in range(0, M2): # zero-out rows of C
pass
for k in range(0, m): # rows of B
for i in range(0, M2):
pass
25 changes: 25 additions & 0 deletions ISSUES/Issue2503.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from lpython import (i16, i32, Const)
from numpy import empty, int16
dim: Const[i32] = 10


def foo():
"""Negative indices produce random results each run."""
A: i16[dim] = empty((dim,), dtype=int16)
ww: i32
for ww in range(dim):
A[ww] = i16(ww + 1)
print(A[0], A[1], A[2], "...", A[-3], A[-2], A[-1])


def bar(dim_: i32):
"""Negative indices always produce zero when 'dim' is a parameter."""
A: i16[dim_] = empty((dim_,), dtype=int16)
ww: i32
for ww in range(dim_):
A[ww] = i16(ww + 1)
print(A[0], A[1], A[2], "...", A[-3], A[-2], A[-1])


foo()
bar(10)
5 changes: 5 additions & 0 deletions ISSUES/Issue2509.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

def main(option: bool = False):
print("option: ", option)

main()
20 changes: 20 additions & 0 deletions ISSUES/SIGSEGV/Issue2498.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from numpy import array, empty, int16
from lpython import (i16, i32, ccallback, c_p_pointer, Pointer, u64, CPtr, i64,
ccall, sizeof, Array, Allocatable, TypeVar, Const)


rows = TypeVar("rows")
cols = TypeVar("cols")


def spot_print_lpython_array(a: i16[:], rows: i32, cols: i32) -> i16[rows, cols]:
pass


def main() -> i32:
print ("hello, world!")
return 0


if __name__ == "__main__":
main()
117 changes: 117 additions & 0 deletions ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import numpy
from lpython import (i16, i32, ccallback, c_p_pointer, Pointer, u64, CPtr, i64)

######## ALL THE LINES WITH EIGHT COMMENT MARKS ARE THE ONES WE NEED TO
######## BRING UP! AS IT STANDS, THIS CODE WORKS IN LPYTHON MAIN AS OF 4
######## FEBRUARY 2024.

# https://numpy.org/devdocs/reference/typing.html
######## from numpy.typing import NDArray


# plan for 30 Jan 2024 --
# step 0: comment out this code and ./build_baryon.sh to run on APU
# emulator; or ./run_full_emulation.sh to run in CPython.
# step 1: side-by-side numpy implementation in full-emulation
# - get there line-by-line
# = focus on gvml_add_u16 first


def numpy_side_by_side(n: i32, m: i32, l: i32, M1: i32, M2: i32,
A: CPtr, B: CPtr, C: CPtr) -> \
None: ######## NDArray[numpy.int16]:
VR_SIZE: i32 = 32_768

# In the primary example, n = 15, m = 3, l = 32_768,
# M1 = 1, M2 = 5

# source GSI L4 arrays
pA_nm: Pointer[i16[:]] = c_p_pointer(A, i16[:], array([n * m]))
pB_ml: Pointer[i16[:]] = c_p_pointer(B, i16[:], array([m * l]))

# source numpy arrays
######## A_nm: NDArray[numpy.int16] = numpy.zeros((n, m), dtype=numpy.int16)
######## for row in range(n):
######## A_nm[row,:] = pA_nm[(row * m):((row + 1) * m)]
A_nm: Array[i16, n, m]
row : i32
for row in range(n):
col : i32
for col in range(m):
A_nm[row, col] = pA_nm[(row * m):((row * m) + col)]

######## B_ml: NDArray[numpy.int16] = numpy.zeros((m, l), dtype=numpy.int16)
######## for row in range(m):
######## B_ml[row,:] = pB_ml[(row * l):((row + 1) * l)]

# # destination numpy array
######## C_nl: NDArray[numpy.int16] = numpy.zeros((n, l), dtype=numpy.int16)

# destination GSI L4 array
pC_nl: Pointer[i16[:]] = c_p_pointer(C, i16[:], array([n * l]))

# First, accumulate outer product without blocking. This is
# the code we would -ultimately- like to compile. Notice that
# all GSI-specific L1, L4, MMB are hidden.

k: i32
######## for k in range(0, m):
######## C_nl += numpy.outer(A_nm[:,k], B_ml[k,:])
######## pass

# expect
# [[ 5 8 11 ... 20 23 26],
# [ 8 14 20 ... 38 44 50],
# [11 20 29 ... 56 65 74], ...
#
# [ 8 14 20 ... 38 44 50],
# [11 20 29 ... 56 65 74],
# [14 26 38 ... 74 86 98]]
set_breakpoint_here_and_inspect_C_nl : i32 = 0

# Second, with explicit blocking. This is a stepping-stone
# for our back-end. Notice that L1 and MMB are hidden.

# T_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16)
# B_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16)
A_ik: i16
jj: i32
ii: i32
i: i32
for jj in range(0, l, VR_SIZE): # each VR-col chunk in B and C
for ii in range(0, n, M2): # each M2 block in A cols and B rows
for i in range(0, M2): # zero-out rows of C
######## C_nl[i + ii, :] = 0
pass
for k in range(0, m): # rows of B
# B_1l[0, :] = B_ml[k, :]
for i in range(0, M2):
######## A_ik = A_nm[i + ii, k]
# broadcast a single element of A
# T_1l[0, :] = A_ik
# pointwise (Hadamard) product:
# T_1l[0, :] = np.multiply(B_1l[0, :], T_1l[0, :])
# C_nl[i + ii, :] += T_1l[0, :]
# optimization without the temporaries
######## C_nl[i + ii, :] += B_ml[k, :] * A_ik
pass

set_breakpoint_here_and_inspect_C_nl = 0

######## return C_nl

def main():
n : i32 = 15
m : i32 = 3
l : i32 = 32_768
M1 : i32 = 1
M2 : i32 = 5
A_l4 : CPtr
B_l4 : CPtr
C_l4 : CPtr
numpy_side_by_side(n, m, l, M1, M2, A_l4, B_l4, C_l4)
print ("hello, world!")


if __name__ == "__main__":
main()
55 changes: 55 additions & 0 deletions ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
└─(10:46:54 on vector-backend ✖ ✭)──> lpython ../ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.py 1 ↵ ──(Mon,Feb05)─┘
Internal Compiler Error: Unhandled exception
Traceback (most recent call last):
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/bin/lpython.cpp", line 1872
err = compile_python_to_object_file(arg_file, tmp_o, runtime_library_dir,
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/bin/lpython.cpp", line 824
res = fe.get_llvm3(*asr, pass_manager, diagnostics, infile);
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/lpython/python_evaluator.cpp", line 71
run_fn, infile);
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 9282
v.visit_asr((ASR::asr_t&)asr);
File "../libasr/asr.h", line 5057
File "../libasr/asr.h", line 5033
File "../libasr/asr.h", line 5058
File "../libasr/asr.h", line 4766
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 932
ASR::symbol_t *mod = x.m_symtab->get_symbol(item);
File "../libasr/asr.h", line 5060
File "../libasr/asr.h", line 4774
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 2976
finish_module_init_function_prototype(x);
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 3927
ASR::Function_t *s = ASR::down_cast<ASR::Function_t>(item.second);
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 3683
visit_procedures(x);
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 3880
this->visit_stmt(*x.m_body[i]);
File "../libasr/asr.h", line 5077
File "../libasr/asr.h", line 4827
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5603
create_loop(x.m_name, [=]() {
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 321
start_new_block(loopbody); {
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5609
this->visit_stmt(*x.m_body[i]);
File "../libasr/asr.h", line 5077
File "../libasr/asr.h", line 4834
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5063
this->visit_stmt(*(block->m_body[i]));
File "../libasr/asr.h", line 5077
File "../libasr/asr.h", line 4827
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5603
create_loop(x.m_name, [=]() {
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 321
start_new_block(loopbody); {
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5609
this->visit_stmt(*x.m_body[i]);
File "../libasr/asr.h", line 5077
File "../libasr/asr.h", line 4800
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 4283
handle_array_section_association_to_pointer(x);
File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 4247
LCOMPILERS_ASSERT(target_rank > 0);
AssertFailed: target_rank > 0
(lp) ┌─(~/Documents/GitHub/lpython/integration_tests)───────────────────────────────────────(brian@MacBook-Pro:s001)─┐
116 changes: 116 additions & 0 deletions ISSUES/UNHANDLED-EXCEPTIONS/Issue2480.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import numpy
from lpython import (i16, i32, ccallback, c_p_pointer, Pointer, u64, CPtr, i64)

######## ALL THE LINES WITH EIGHT COMMENT MARKS ARE THE ONES WE NEED TO
######## BRING UP! AS IT STANDS, THIS CODE WORKS IN LPYTHON MAIN AS OF 4
######## FEBRUARY 2024.

# https://numpy.org/devdocs/reference/typing.html
######## from numpy.typing import NDArray


# plan for 30 Jan 2024 --
# step 0: comment out this code and ./build_baryon.sh to run on APU
# emulator; or ./run_full_emulation.sh to run in CPython.
# step 1: side-by-side numpy implementation in full-emulation
# - get there line-by-line
# = focus on gvml_add_u16 first


def numpy_side_by_side(n: i32, m: i32, l: i32, M1: i32, M2: i32,
A: CPtr, B: CPtr, C: CPtr) -> \
None: ######## NDArray[numpy.int16]:
VR_SIZE: i32 = 32_768

# In the primary example, n = 15, m = 3, l = 32_768,
# M1 = 1, M2 = 5

# source GSI L4 arrays
pA_nm: Pointer[i16[:]] = c_p_pointer(A, i16[:], array([n * m]))
pB_ml: Pointer[i16[:]] = c_p_pointer(B, i16[:], array([m * l]))

# source numpy arrays
######## A_nm: NDArray[numpy.int16] = numpy.zeros((n, m), dtype=numpy.int16)
######## for row in range(n):
######## A_nm[row,:] = pA_nm[(row * m):((row + 1) * m)]
A_nm: Array[i16, n, m]
row : i32
for row in range(n):
A_nm[row,:] = pA_nm[(row * m):((row + 1) * m)]


######## B_ml: NDArray[numpy.int16] = numpy.zeros((m, l), dtype=numpy.int16)
######## for row in range(m):
######## B_ml[row,:] = pB_ml[(row * l):((row + 1) * l)]

# # destination numpy array
######## C_nl: NDArray[numpy.int16] = numpy.zeros((n, l), dtype=numpy.int16)

# destination GSI L4 array
pC_nl: Pointer[i16[:]] = c_p_pointer(C, i16[:], array([n * l]))

# First, accumulate outer product without blocking. This is
# the code we would -ultimately- like to compile. Notice that
# all GSI-specific L1, L4, MMB are hidden.

k: i32
######## for k in range(0, m):
######## C_nl += numpy.outer(A_nm[:,k], B_ml[k,:])
######## pass

# expect
# [[ 5 8 11 ... 20 23 26],
# [ 8 14 20 ... 38 44 50],
# [11 20 29 ... 56 65 74], ...
#
# [ 8 14 20 ... 38 44 50],
# [11 20 29 ... 56 65 74],
# [14 26 38 ... 74 86 98]]
set_breakpoint_here_and_inspect_C_nl : i32 = 0

# Second, with explicit blocking. This is a stepping-stone
# for our back-end. Notice that L1 and MMB are hidden.

# T_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16)
# B_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16)
A_ik: i16
jj: i32
ii: i32
i: i32
for jj in range(0, l, VR_SIZE): # each VR-col chunk in B and C
for ii in range(0, n, M2): # each M2 block in A cols and B rows
for i in range(0, M2): # zero-out rows of C
######## C_nl[i + ii, :] = 0
pass
for k in range(0, m): # rows of B
# B_1l[0, :] = B_ml[k, :]
for i in range(0, M2):
######## A_ik = A_nm[i + ii, k]
# broadcast a single element of A
# T_1l[0, :] = A_ik
# pointwise (Hadamard) product:
# T_1l[0, :] = np.multiply(B_1l[0, :], T_1l[0, :])
# C_nl[i + ii, :] += T_1l[0, :]
# optimization without the temporaries
######## C_nl[i + ii, :] += B_ml[k, :] * A_ik
pass

set_breakpoint_here_and_inspect_C_nl = 0

######## return C_nl

def main():
n : i32 = 15
m : i32 = 3
l : i32 = 32_768
M1 : i32 = 1
M2 : i32 = 5
A_l4 : CPtr
B_l4 : CPtr
C_l4 : CPtr
numpy_side_by_side(n, m, l, M1, M2, A_l4, B_l4, C_l4)
print ("hello, world!")


if __name__ == "__main__":
main()
Loading
Loading