lcompilers · rebcabin · Feb 5, 2024 · Feb 5, 2024 · Feb 5, 2024 · Feb 5, 2024
diff --git a/.gitignore b/.gitignore
@@ -169,6 +169,7 @@ MANIFEST
 ## Editor Files
 .vscode/
 .vs/
+.idea/
 
 ## Build Files
 */bin/lpython

diff --git a/ISSUES/Issue2496.py b/ISSUES/Issue2496.py
@@ -0,0 +1,22 @@
+from numpy import array, empty, int16
+from lpython import (i16, i32, c_p_pointer, Pointer, CPtr, TypeVar)
+
+
+Tn = TypeVar("Tn")
+Tm = TypeVar("Tm")
+Tl = TypeVar("Tl")
+
+
+def THIS_WORKS(Anm_l4: CPtr, Tn: i32, Tm: i32, l: i32) -> i16[Tn, Tm]:
+    A_nm: i16[Tn, Tm] = empty((Tn, Tm), dtype=int16)
+    return A_nm
+
+
+def THIS_DOESNT_WORK(d: i16[Tm, Tn], b: CPtr, Tm: i32, Tn: i32) -> None:
+    B: Pointer[i16[:]] = c_p_pointer(b, i16[:], array([Tm * Tn]))
+    i: i32
+    j: i32
+    for i in range(Tm):
+        for j in range(Tn):
+            d[i, j] = B[(i * Tn) + j]
+
diff --git a/ISSUES/Issue2499.py b/ISSUES/Issue2499.py
@@ -0,0 +1,18 @@
+from lpython import i32, i16, Const
+VR_SIZE: i32 = 32_768
+l: Const[i32] = VR_SIZE
+n: Const[i32] = 15
+m: Const[i32] = 3
+k: i32
+M2: Const[i32] = 5
+A_ik: i16
+jj: i32
+ii: i32
+i: i32
+for jj in range(0, l, VR_SIZE):  # each VR-col chunk in B and C
+    for ii in range(0, n, M2):  # each M2 block in A cols and B rows
+        for i in range(0, M2):  # zero-out rows of C
+            pass
+        for k in range(0, m):  # rows of B
+            for i in range(0, M2):
+                pass
diff --git a/ISSUES/Issue2503.py b/ISSUES/Issue2503.py
@@ -0,0 +1,25 @@
+from lpython import (i16, i32, Const)
+from numpy import empty, int16
+dim: Const[i32] = 10
+
+
+def foo():
+    """Negative indices produce random results each run."""
+    A: i16[dim] = empty((dim,), dtype=int16)
+    ww: i32
+    for ww in range(dim):
+        A[ww] = i16(ww + 1)
+    print(A[0], A[1], A[2], "...", A[-3], A[-2], A[-1])
+
+
+def bar(dim_: i32):
+    """Negative indices always produce zero when 'dim' is a parameter."""
+    A: i16[dim_] = empty((dim_,), dtype=int16)
+    ww: i32
+    for ww in range(dim_):
+        A[ww] = i16(ww + 1)
+    print(A[0], A[1], A[2], "...", A[-3], A[-2], A[-1])
+
+
+foo()
+bar(10)
diff --git a/ISSUES/Issue2509.py b/ISSUES/Issue2509.py
@@ -0,0 +1,5 @@
+
+def main(option: bool = False):
+    print("option: ", option)
+
+main()
diff --git a/ISSUES/SIGSEGV/Issue2498.py b/ISSUES/SIGSEGV/Issue2498.py
@@ -0,0 +1,20 @@
+from numpy import array, empty, int16
+from lpython import (i16, i32, ccallback, c_p_pointer, Pointer, u64, CPtr, i64,
+                     ccall, sizeof, Array, Allocatable, TypeVar, Const)
+
+
+rows = TypeVar("rows")
+cols = TypeVar("cols")
+
+
+def spot_print_lpython_array(a: i16[:], rows: i32, cols: i32) -> i16[rows, cols]:
+    pass
+
+
+def main() -> i32:
+    print ("hello, world!")
+    return 0
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.py b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.py
@@ -0,0 +1,117 @@
+import numpy
+from lpython import (i16, i32, ccallback, c_p_pointer, Pointer, u64, CPtr, i64)
+
+######## ALL THE LINES WITH EIGHT COMMENT MARKS ARE THE ONES WE NEED TO
+######## BRING UP!  AS IT STANDS, THIS CODE WORKS IN LPYTHON MAIN AS OF 4
+######## FEBRUARY 2024.
+
+# https://numpy.org/devdocs/reference/typing.html
+######## from numpy.typing import NDArray
+
+
+# plan for 30 Jan 2024 --
+# step 0: comment out this code and ./build_baryon.sh to run on APU
+#         emulator; or ./run_full_emulation.sh to run in CPython.
+# step 1: side-by-side numpy implementation in full-emulation
+#         - get there line-by-line
+#         = focus on gvml_add_u16 first
+
+
+def numpy_side_by_side(n: i32, m: i32, l: i32, M1: i32, M2: i32,
+                       A: CPtr, B: CPtr, C: CPtr) -> \
+        None: ######## NDArray[numpy.int16]:
+    VR_SIZE: i32 = 32_768
+
+    # In the primary example, n = 15, m = 3, l = 32_768,
+    # M1 = 1, M2 = 5
+
+    # source GSI L4 arrays
+    pA_nm: Pointer[i16[:]] = c_p_pointer(A, i16[:], array([n * m]))
+    pB_ml: Pointer[i16[:]] = c_p_pointer(B, i16[:], array([m * l]))
+
+    # source numpy arrays
+    ######## A_nm: NDArray[numpy.int16] = numpy.zeros((n, m), dtype=numpy.int16)
+    ######## for row in range(n):
+    ########     A_nm[row,:] = pA_nm[(row * m):((row + 1) * m)]
+    A_nm: Array[i16, n, m]
+    row : i32
+    for row in range(n):
+        col : i32
+        for col in range(m):
+            A_nm[row, col] = pA_nm[(row * m):((row * m) + col)]
+
+    ######## B_ml: NDArray[numpy.int16] = numpy.zeros((m, l), dtype=numpy.int16)
+    ######## for row in range(m):
+    ########     B_ml[row,:] = pB_ml[(row * l):((row + 1) * l)]
+
+    # # destination numpy array
+    ######## C_nl: NDArray[numpy.int16] = numpy.zeros((n, l), dtype=numpy.int16)
+
+    # destination GSI L4 array
+    pC_nl: Pointer[i16[:]] = c_p_pointer(C, i16[:], array([n * l]))
+
+    # First, accumulate outer product without blocking. This is
+    # the code we would -ultimately- like to compile. Notice that
+    # all GSI-specific L1, L4, MMB are hidden.
+
+    k: i32
+    ######## for k in range(0, m):
+    ########     C_nl += numpy.outer(A_nm[:,k], B_ml[k,:])
+    ########     pass
+
+    # expect
+    # [[ 5  8 11 ... 20 23 26],
+    #  [ 8 14 20 ... 38 44 50],
+    #  [11 20 29 ... 56 65 74], ...
+    #
+    #  [ 8 14 20 ... 38 44 50],
+    #  [11 20 29 ... 56 65 74],
+    #  [14 26 38 ... 74 86 98]]
+    set_breakpoint_here_and_inspect_C_nl : i32 = 0
+
+    # Second, with explicit blocking. This is a stepping-stone
+    # for our back-end. Notice that L1 and MMB are hidden.
+
+    # T_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16)
+    # B_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16)
+    A_ik: i16
+    jj: i32
+    ii: i32
+    i: i32
+    for jj in range(0, l, VR_SIZE):  # each VR-col chunk in B and C
+        for ii in range(0, n, M2):  # each M2 block in A cols and B rows
+            for i in range(0, M2):  # zero-out rows of C
+                ######## C_nl[i + ii, :] = 0
+                pass
+            for k in range(0, m):  # rows of B
+                # B_1l[0, :] = B_ml[k, :]
+                for i in range(0, M2):
+                    ######## A_ik = A_nm[i + ii, k]
+                    # broadcast a single element of A
+                    # T_1l[0, :] = A_ik
+                    # pointwise (Hadamard) product:
+                    # T_1l[0, :] = np.multiply(B_1l[0, :], T_1l[0, :])
+                    # C_nl[i + ii, :] += T_1l[0, :]
+                    # optimization without the temporaries
+                    ######## C_nl[i + ii, :] += B_ml[k, :] * A_ik
+                    pass
+
+    set_breakpoint_here_and_inspect_C_nl = 0
+
+    ######## return C_nl
+
+def main():
+    n  : i32 = 15
+    m  : i32 = 3
+    l  : i32 = 32_768
+    M1 : i32 = 1
+    M2 : i32 = 5
+    A_l4 : CPtr
+    B_l4 : CPtr
+    C_l4 : CPtr
+    numpy_side_by_side(n, m, l, M1, M2, A_l4, B_l4, C_l4)
+    print ("hello, world!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.txt b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.txt
@@ -0,0 +1,55 @@
+└─(10:46:54 on vector-backend ✖ ✭)──> lpython ../ISSUES/UNHANDLED-EXCEPTIONS/Issue2479.py     1 ↵ ──(Mon,Feb05)─┘
+Internal Compiler Error: Unhandled exception
+Traceback (most recent call last):
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/bin/lpython.cpp", line 1872
+    err = compile_python_to_object_file(arg_file, tmp_o, runtime_library_dir,
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/bin/lpython.cpp", line 824
+    res = fe.get_llvm3(*asr, pass_manager, diagnostics, infile);
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/lpython/python_evaluator.cpp", line 71
+    run_fn, infile);
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 9282
+    v.visit_asr((ASR::asr_t&)asr);
+  File "../libasr/asr.h", line 5057
+  File "../libasr/asr.h", line 5033
+  File "../libasr/asr.h", line 5058
+  File "../libasr/asr.h", line 4766
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 932
+    ASR::symbol_t *mod = x.m_symtab->get_symbol(item);
+  File "../libasr/asr.h", line 5060
+  File "../libasr/asr.h", line 4774
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 2976
+    finish_module_init_function_prototype(x);
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 3927
+    ASR::Function_t *s = ASR::down_cast<ASR::Function_t>(item.second);
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 3683
+    visit_procedures(x);
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 3880
+    this->visit_stmt(*x.m_body[i]);
+  File "../libasr/asr.h", line 5077
+  File "../libasr/asr.h", line 4827
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5603
+    create_loop(x.m_name, [=]() {
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 321
+    start_new_block(loopbody); {
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5609
+    this->visit_stmt(*x.m_body[i]);
+  File "../libasr/asr.h", line 5077
+  File "../libasr/asr.h", line 4834
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5063
+    this->visit_stmt(*(block->m_body[i]));
+  File "../libasr/asr.h", line 5077
+  File "../libasr/asr.h", line 4827
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5603
+    create_loop(x.m_name, [=]() {
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 321
+    start_new_block(loopbody); {
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 5609
+    this->visit_stmt(*x.m_body[i]);
+  File "../libasr/asr.h", line 5077
+  File "../libasr/asr.h", line 4800
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 4283
+    handle_array_section_association_to_pointer(x);
+  File "/Users/brian/Dropbox/Mac/Documents/GitHub/lpython/src/libasr/codegen/asr_to_llvm.cpp", line 4247
+    LCOMPILERS_ASSERT(target_rank > 0);
+AssertFailed: target_rank > 0
+(lp) ┌─(~/Documents/GitHub/lpython/integration_tests)───────────────────────────────────────(brian@MacBook-Pro:s001)─┐
diff --git a/ISSUES/UNHANDLED-EXCEPTIONS/Issue2480.py b/ISSUES/UNHANDLED-EXCEPTIONS/Issue2480.py
@@ -0,0 +1,116 @@
+import numpy
+from lpython import (i16, i32, ccallback, c_p_pointer, Pointer, u64, CPtr, i64)
+
+######## ALL THE LINES WITH EIGHT COMMENT MARKS ARE THE ONES WE NEED TO
+######## BRING UP!  AS IT STANDS, THIS CODE WORKS IN LPYTHON MAIN AS OF 4
+######## FEBRUARY 2024.
+
+# https://numpy.org/devdocs/reference/typing.html
+######## from numpy.typing import NDArray
+
+
+# plan for 30 Jan 2024 --
+# step 0: comment out this code and ./build_baryon.sh to run on APU
+#         emulator; or ./run_full_emulation.sh to run in CPython.
+# step 1: side-by-side numpy implementation in full-emulation
+#         - get there line-by-line
+#         = focus on gvml_add_u16 first
+
+
+def numpy_side_by_side(n: i32, m: i32, l: i32, M1: i32, M2: i32,
+                       A: CPtr, B: CPtr, C: CPtr) -> \
+        None: ######## NDArray[numpy.int16]:
+    VR_SIZE: i32 = 32_768
+
+    # In the primary example, n = 15, m = 3, l = 32_768,
+    # M1 = 1, M2 = 5
+
+    # source GSI L4 arrays
+    pA_nm: Pointer[i16[:]] = c_p_pointer(A, i16[:], array([n * m]))
+    pB_ml: Pointer[i16[:]] = c_p_pointer(B, i16[:], array([m * l]))
+
+    # source numpy arrays
+    ######## A_nm: NDArray[numpy.int16] = numpy.zeros((n, m), dtype=numpy.int16)
+    ######## for row in range(n):
+    ########     A_nm[row,:] = pA_nm[(row * m):((row + 1) * m)]
+    A_nm: Array[i16, n, m]
+    row : i32
+    for row in range(n):
+        A_nm[row,:] = pA_nm[(row * m):((row + 1) * m)]
+
+
+    ######## B_ml: NDArray[numpy.int16] = numpy.zeros((m, l), dtype=numpy.int16)
+    ######## for row in range(m):
+    ########     B_ml[row,:] = pB_ml[(row * l):((row + 1) * l)]
+
+    # # destination numpy array
+    ######## C_nl: NDArray[numpy.int16] = numpy.zeros((n, l), dtype=numpy.int16)
+
+    # destination GSI L4 array
+    pC_nl: Pointer[i16[:]] = c_p_pointer(C, i16[:], array([n * l]))
+
+    # First, accumulate outer product without blocking. This is
+    # the code we would -ultimately- like to compile. Notice that
+    # all GSI-specific L1, L4, MMB are hidden.
+
+    k: i32
+    ######## for k in range(0, m):
+    ########     C_nl += numpy.outer(A_nm[:,k], B_ml[k,:])
+    ########     pass
+
+    # expect
+    # [[ 5  8 11 ... 20 23 26],
+    #  [ 8 14 20 ... 38 44 50],
+    #  [11 20 29 ... 56 65 74], ...
+    #
+    #  [ 8 14 20 ... 38 44 50],
+    #  [11 20 29 ... 56 65 74],
+    #  [14 26 38 ... 74 86 98]]
+    set_breakpoint_here_and_inspect_C_nl : i32 = 0
+
+    # Second, with explicit blocking. This is a stepping-stone
+    # for our back-end. Notice that L1 and MMB are hidden.
+
+    # T_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16)
+    # B_1l: NDArray[numpy.int16] = numpy.zeros((1, l), dtype=numpy.int16)
+    A_ik: i16
+    jj: i32
+    ii: i32
+    i: i32
+    for jj in range(0, l, VR_SIZE):  # each VR-col chunk in B and C
+        for ii in range(0, n, M2):  # each M2 block in A cols and B rows
+            for i in range(0, M2):  # zero-out rows of C
+                ######## C_nl[i + ii, :] = 0
+                pass
+            for k in range(0, m):  # rows of B
+                # B_1l[0, :] = B_ml[k, :]
+                for i in range(0, M2):
+                    ######## A_ik = A_nm[i + ii, k]
+                    # broadcast a single element of A
+                    # T_1l[0, :] = A_ik
+                    # pointwise (Hadamard) product:
+                    # T_1l[0, :] = np.multiply(B_1l[0, :], T_1l[0, :])
+                    # C_nl[i + ii, :] += T_1l[0, :]
+                    # optimization without the temporaries
+                    ######## C_nl[i + ii, :] += B_ml[k, :] * A_ik
+                    pass
+
+    set_breakpoint_here_and_inspect_C_nl = 0
+
+    ######## return C_nl
+
+def main():
+    n  : i32 = 15
+    m  : i32 = 3
+    l  : i32 = 32_768
+    M1 : i32 = 1
+    M2 : i32 = 5
+    A_l4 : CPtr
+    B_l4 : CPtr
+    C_l4 : CPtr
+    numpy_side_by_side(n, m, l, M1, M2, A_l4, B_l4, C_l4)
+    print ("hello, world!")
+
+
+if __name__ == "__main__":
+    main()