From 853924f3e1073418e3ce9eeaefb083367c5cef7d Mon Sep 17 00:00:00 2001
From: DatanoiseTV <github@ext.no-route.org>
Date: Mon, 14 Oct 2024 19:53:20 +0200
Subject: [PATCH] Add WiP PSRAM.

---
 CMakeLists.txt                               |   28 +-
 default.ld.in                                |  316 +++++
 include/picoadk_hw.h                         |    5 -
 include/psram.h                              |   39 +
 include/{tusb_config.h => tusb_config.ha}    |    0
 lib/tlsf/.github/workflows/issue_comment.yml |   21 +
 lib/tlsf/.github/workflows/new_issues.yml    |   21 +
 lib/tlsf/.github/workflows/new_prs.yml       |   26 +
 lib/tlsf/README.md                           |   92 ++
 lib/tlsf/tlsf.c                              | 1290 ++++++++++++++++++
 lib/tlsf/tlsf.h                              |   95 ++
 lib/tlsf/tlsf_block_functions.h              |  123 ++
 lib/tlsf/tlsf_common.h                       |  127 ++
 src/main.cpp                                 |   96 +-
 src/picoadk_hw.cpp                           |    7 +-
 src/psram.cpp                                |  440 ++++++
 tools/simplesub.py                           |   29 +
 17 files changed, 2679 insertions(+), 76 deletions(-)
 create mode 100644 default.ld.in
 create mode 100644 include/psram.h
 rename include/{tusb_config.h => tusb_config.ha} (100%)
 create mode 100644 lib/tlsf/.github/workflows/issue_comment.yml
 create mode 100644 lib/tlsf/.github/workflows/new_issues.yml
 create mode 100644 lib/tlsf/.github/workflows/new_prs.yml
 create mode 100644 lib/tlsf/README.md
 create mode 100644 lib/tlsf/tlsf.c
 create mode 100644 lib/tlsf/tlsf.h
 create mode 100644 lib/tlsf/tlsf_block_functions.h
 create mode 100644 lib/tlsf/tlsf_common.h
 create mode 100644 src/psram.cpp
 create mode 100644 tools/simplesub.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1e977b1..15b6a6a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,19 +79,20 @@ endif()
 
 add_executable(main
     # vultsrc/dsp.vult
-    src/usb_descriptors.c
+    #src/usb_descriptors.c
     src/main.cpp 
     src/picoadk_hw.cpp
-    src/midi_input_usb.cpp
+    #src/midi_input_usb.cpp
     src/get_serial.c
+    src/psram.cpp
     lib/FreeRTOS-Kernel/portable/MemMang/heap_3.c
 )
 
 pico_set_program_name(main "main")
 pico_set_program_version(main "0.1")
 
-pico_enable_stdio_uart(main 1)
-pico_enable_stdio_usb(main 0)
+#pico_enable_stdio_uart(main 1)
+pico_enable_stdio_usb(main 1)
 
 # Add the standard library to the build
 target_link_libraries(main pico_stdlib)
@@ -114,9 +115,10 @@ target_link_libraries(main
         pico_util_buffer
         pico_multicore
         pico_stdio_uart
-        tinyusb_device
-        tinyusb_host
-        tinyusb_board
+        
+        #tinyusb_device
+        #tinyusb_host
+        #tinyusb_board
 	Audio
         #Oled
         FreeRTOS-Kernel
@@ -125,14 +127,22 @@ target_link_libraries(main
 target_compile_definitions(main PRIVATE
         USE_AUDIO_I2S=1
         PICO_USE_SW_SPIN_LOCKS=1
+        RP2350_PSRAM_CS=0
 )
 
 target_link_options(main PRIVATE
         -Xlinker --print-memory-usage
 )
 
-# TODO: PSRAM Linker Map
-# pico_set_linker_script(main ${CMAKE_BINARY_DIR}/memmap.ld)
+add_custom_target(
+    generate_linker_script ALL
+    COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/tools/simplesub.py -i ${CMAKE_CURRENT_SOURCE_DIR}/default.ld.in -o ${CMAKE_CURRENT_BINARY_DIR}/default.ld --sub __FLASH_LENGTH__ ${FLASH_LENGTH} --sub __EEPROM_START__ ${EEPROM_START} --sub __FS_START__ ${FS_START} --sub __FS_END__ ${FS_END} --sub __RAM_LENGTH__ ${RAM_LENGTH} --sub __PSRAM_LENGTH__ ${PSRAM_LENGTH}
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/default.ld.in ${CMAKE_CURRENT_SOURCE_DIR}/tools/simplesub.py
+)
+
+add_dependencies(main generate_linker_script)
+
+pico_set_linker_script(main ${CMAKE_CURRENT_BINARY_DIR}/default.ld)
 
 file(GLOB files "${CMAKE_CURRENT_LIST_DIR}/piosrc/*.pio")
 foreach(file ${files})
diff --git a/default.ld.in b/default.ld.in
new file mode 100644
index 0000000..dadee37
--- /dev/null
+++ b/default.ld.in
@@ -0,0 +1,316 @@
+/* Based on GCC ARM embedded samples.
+   Defines the following symbols for use by code:
+    __exidx_start
+    __exidx_end
+    __etext
+    __data_start__
+    __preinit_array_start
+    __preinit_array_end
+    __init_array_start
+    __init_array_end
+    __fini_array_start
+    __fini_array_end
+    __data_end__
+    __bss_start__
+    __bss_end__
+    __end__
+    end
+    __HeapLimit
+    __StackLimit
+    __StackTop
+    __stack (== StackTop)
+*/
+
+MEMORY
+{
+    FLASH(rx) : ORIGIN = 0x10000000, LENGTH = 4M
+    PSRAM(rwx) : ORIGIN = 0x11000000, LENGTH = 8M
+    RAM(rwx) : ORIGIN =  0x20000000, LENGTH = 512k
+    SCRATCH_X(rwx) : ORIGIN = 0x20080000, LENGTH = 4k
+    SCRATCH_Y(rwx) : ORIGIN = 0x20081000, LENGTH = 4k
+}
+
+ENTRY(_entry_point)
+
+SECTIONS
+{
+    .flash_begin : {
+        __flash_binary_start = .;
+    } > FLASH
+
+    /* The bootrom will enter the image at the point indicated in your
+       IMAGE_DEF, which is usually the reset handler of your vector table.
+
+       The debugger will use the ELF entry point, which is the _entry_point
+       symbol, and in our case is *different from the bootrom's entry point.*
+       This is used to go back through the bootrom on debugger launches only,
+       to perform the same initial flash setup that would be performed on a
+       cold boot.
+    */
+
+    .text : {
+        __logical_binary_start = .;
+        KEEP (*(.vectors))
+        KEEP (*(.binary_info_header))
+        __binary_info_header_end = .;
+        KEEP (*(.embedded_block))
+        __embedded_block_end = .;
+        KEEP (*(.reset))
+        /* TODO revisit this now memset/memcpy/float in ROM */
+        /* bit of a hack right now to exclude all floating point and time critical (e.g. memset, memcpy) code from
+         * FLASH ... we will include any thing excluded here in .data below by default */
+        *(.init)
+        *libgcc.a:cmse_nonsecure_call.o
+        *(EXCLUDE_FILE(*libgcc.a: *libc.a:*lib_a-mem*.o *libm.a:) .text*)
+        *(.fini)
+        /* Pull all c'tors into .text */
+        *crtbegin.o(.ctors)
+        *crtbegin?.o(.ctors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+        *(SORT(.ctors.*))
+        *(.ctors)
+        /* Followed by destructors */
+        *crtbegin.o(.dtors)
+        *crtbegin?.o(.dtors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+        *(SORT(.dtors.*))
+        *(.dtors)
+
+        . = ALIGN(4);
+        /* preinit data */
+        PROVIDE_HIDDEN (__preinit_array_start = .);
+        KEEP(*(SORT(.preinit_array.*)))
+        KEEP(*(.preinit_array))
+        PROVIDE_HIDDEN (__preinit_array_end = .);
+
+        . = ALIGN(4);
+        /* init data */
+        PROVIDE_HIDDEN (__init_array_start = .);
+        KEEP(*(SORT(.init_array.*)))
+        KEEP(*(.init_array))
+        PROVIDE_HIDDEN (__init_array_end = .);
+
+        . = ALIGN(4);
+        /* finit data */
+        PROVIDE_HIDDEN (__fini_array_start = .);
+        *(SORT(.fini_array.*))
+        *(.fini_array)
+        PROVIDE_HIDDEN (__fini_array_end = .);
+
+        *(.eh_frame*)
+        . = ALIGN(4);
+    } > FLASH
+
+    /* Note the boot2 section is optional, and should be discarded if there is
+       no reference to it *inside* the binary, as it is not called by the
+       bootrom. (The bootrom performs a simple best-effort XIP setup and
+       leaves it to the binary to do anything more sophisticated.) However
+       there is still a size limit of 256 bytes, to ensure the boot2 can be
+       stored in boot RAM.
+
+       Really this is a "XIP setup function" -- the name boot2 is historic and
+       refers to its dual-purpose on RP2040, where it also handled vectoring
+       from the bootrom into the user image.
+    */
+
+    .boot2 : {
+        __boot2_start__ = .;
+        *(.boot2)
+        __boot2_end__ = .;
+    } > FLASH
+
+    ASSERT(__boot2_end__ - __boot2_start__ <= 256,
+        "ERROR: Pico second stage bootloader must be no more than 256 bytes in size")
+
+    .rodata : {
+        *(EXCLUDE_FILE(*libgcc.a: *libc.a:*lib_a-mem*.o *libm.a:) .rodata*)
+        *(.srodata*)
+        . = ALIGN(4);
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*)))
+        . = ALIGN(4);
+    } > FLASH
+
+    .ARM.extab :
+    {
+        *(.ARM.extab* .gnu.linkonce.armextab.*)
+    } > FLASH
+
+    __exidx_start = .;
+    .ARM.exidx :
+    {
+        *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+    } > FLASH
+    __exidx_end = .;
+
+    /* Machine inspectable binary information */
+    . = ALIGN(4);
+    __binary_info_start = .;
+    .binary_info :
+    {
+        KEEP(*(.binary_info.keep.*))
+        *(.binary_info.*)
+    } > FLASH
+    __binary_info_end = .;
+    . = ALIGN(4);
+
+    .ram_vector_table (NOLOAD): {
+        *(.ram_vector_table)
+    } > RAM
+
+    .uninitialized_data (NOLOAD): {
+        . = ALIGN(4);
+        *(.uninitialized_data*)
+    } > RAM
+
+    .data : {
+        __data_start__ = .;
+        *(vtable)
+
+        *(.time_critical*)
+
+        /* remaining .text and .rodata; i.e. stuff we exclude above because we want it in RAM */
+        *(.text*)
+        . = ALIGN(4);
+        *(.rodata*)
+        . = ALIGN(4);
+
+        *(.data*)
+        *(.sdata*)
+
+        . = ALIGN(4);
+        *(.after_data.*)
+        . = ALIGN(4);
+        /* preinit data */
+        PROVIDE_HIDDEN (__mutex_array_start = .);
+        KEEP(*(SORT(.mutex_array.*)))
+        KEEP(*(.mutex_array))
+        PROVIDE_HIDDEN (__mutex_array_end = .);
+
+        *(.jcr)
+        . = ALIGN(4);
+    } > RAM AT> FLASH
+
+    .tdata : {
+        . = ALIGN(4);
+		*(.tdata .tdata.* .gnu.linkonce.td.*)
+        /* All data end */
+        __tdata_end = .;
+    } > RAM AT> FLASH
+    PROVIDE(__data_end__ = .);
+
+    /* __etext is (for backwards compatibility) the name of the .data init source pointer (...) */
+    __etext = LOADADDR(.data);
+
+    .tbss (NOLOAD) : {
+        . = ALIGN(4);
+        __bss_start__ = .;
+        __tls_base = .;
+        *(.tbss .tbss.* .gnu.linkonce.tb.*)
+        *(.tcommon)
+
+        __tls_end = .;
+    } > RAM
+
+    .bss (NOLOAD) : {
+        . = ALIGN(4);
+        __tbss_end = .;
+
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*)))
+        *(COMMON)
+        PROVIDE(__global_pointer$ = . + 2K);
+        *(.sbss*)
+        . = ALIGN(4);
+        __bss_end__ = .;
+    } > RAM
+
+    .heap (NOLOAD):
+    {
+        __end__ = .;
+        end = __end__;
+        KEEP(*(.heap*))
+        /* historically on GCC sbrk was growing past __HeapLimit to __StackLimit, however
+           to be more compatible, we now set __HeapLimit explicitly to where the end of the heap is */
+        . = ORIGIN(RAM) + LENGTH(RAM);
+        __HeapLimit = .;
+    } > RAM
+
+    /* Start and end symbols must be word-aligned */
+    .scratch_x : {
+        __scratch_x_start__ = .;
+        *(.scratch_x.*)
+        . = ALIGN(4);
+        __scratch_x_end__ = .;
+    } > SCRATCH_X AT > FLASH
+    __scratch_x_source__ = LOADADDR(.scratch_x);
+
+    .scratch_y : {
+        __scratch_y_start__ = .;
+        *(.scratch_y.*)
+        . = ALIGN(4);
+        __scratch_y_end__ = .;
+    } > SCRATCH_Y AT > FLASH
+    __scratch_y_source__ = LOADADDR(.scratch_y);
+
+    /* .stack*_dummy section doesn't contains any symbols. It is only
+     * used for linker to calculate size of stack sections, and assign
+     * values to stack symbols later
+     *
+     * stack1 section may be empty/missing if platform_launch_core1 is not used */
+
+    /* by default we put core 0 stack at the end of scratch Y, so that if core 1
+     * stack is not used then all of SCRATCH_X is free.
+     */
+    .stack1_dummy (NOLOAD):
+    {
+        *(.stack1*)
+    } > SCRATCH_X
+    .stack_dummy (NOLOAD):
+    {
+        KEEP(*(.stack*))
+    } > SCRATCH_Y
+
+    .flash_end : {
+        KEEP(*(.embedded_end_block*))
+        PROVIDE(__flash_binary_end = .);
+    } > FLASH =0xaa
+
+    .psram (NOLOAD) : {
+        __psram_start__ = .;
+        *(.psram*)
+        . = ALIGN(4096);
+        __psram_heap_start__ = .;
+    } > PSRAM
+
+    /* stack limit is poorly named, but historically is maximum heap ptr */
+    __StackLimit = ORIGIN(RAM) + LENGTH(RAM);
+    __StackOneTop = ORIGIN(SCRATCH_X) + LENGTH(SCRATCH_X);
+    __StackTop = ORIGIN(SCRATCH_Y) + LENGTH(SCRATCH_Y);
+    __StackOneBottom = __StackOneTop - SIZEOF(.stack1_dummy);
+    __StackBottom = __StackTop - SIZEOF(.stack_dummy);
+    PROVIDE(__stack = __StackTop);
+
+
+    /* picolibc and LLVM */
+    PROVIDE (__heap_start = __end__);
+    PROVIDE (__heap_end = __HeapLimit);
+    PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) );
+    PROVIDE( __tls_size_align = (__tls_size + __tls_align - 1) & ~(__tls_align - 1));
+    PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) );
+
+    /* llvm-libc */
+    PROVIDE (_end = __end__);
+    PROVIDE (__llvm_libc_heap_limit = __HeapLimit);
+
+    /* TLSF */
+    PROVIDE (__psram_start = __psram_start__);
+    PROVIDE (__psram_heap_start = __psram_heap_start__);
+
+    /* Check if data + heap + stack exceeds RAM limit */
+    ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed")
+
+    ASSERT( __binary_info_header_end - __logical_binary_start <= 1024, "Binary info must be in first 1024 bytes of the binary")
+    ASSERT( __embedded_block_end - __logical_binary_start <= 4096, "Embedded block must be in first 4096 bytes of the binary")
+
+    /* todo assert on extra code */
+}
+
diff --git a/include/picoadk_hw.h b/include/picoadk_hw.h
index 111de35..261a4b7 100644
--- a/include/picoadk_hw.h
+++ b/include/picoadk_hw.h
@@ -17,11 +17,6 @@
 #include "hardware/structs/rosc.h"
 #include "helper.h"
 
-#if __has_include("bsp/board_api.h")
-#include "bsp/board_api.h"
-#else
-#include "bsp/board.h"
-#endif
 
 void picoadk_init();
 int adc128_read(uint8_t chan);
diff --git a/include/psram.h b/include/psram.h
new file mode 100644
index 0000000..3292065
--- /dev/null
+++ b/include/psram.h
@@ -0,0 +1,39 @@
+/**
+    @file sfe_psram.c
+
+    @brief This file contains a function that is used to detect and initialize PSRAM on
+    SparkFun rp2350 boards.
+*/
+
+/*
+    The MIT License (MIT)
+
+    Copyright (c) 2024 SparkFun Electronics
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions: The
+    above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED
+    "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+    NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+    PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+#pragma once
+
+void psram_reinit_timing();
+void *__psram_malloc(size_t size);
+void __psram_free(void *ptr);
+void *__psram_realloc(void *ptr, size_t size);
+void *__psram_calloc(size_t num, size_t size);
+size_t __psram_largest_free_block();
+size_t __psram_total_space();
+size_t __psram_total_used();
diff --git a/include/tusb_config.h b/include/tusb_config.ha
similarity index 100%
rename from include/tusb_config.h
rename to include/tusb_config.ha
diff --git a/lib/tlsf/.github/workflows/issue_comment.yml b/lib/tlsf/.github/workflows/issue_comment.yml
new file mode 100644
index 0000000..da06a4c
--- /dev/null
+++ b/lib/tlsf/.github/workflows/issue_comment.yml
@@ -0,0 +1,21 @@
+name: Sync issue comments to JIRA
+
+# This workflow will be triggered when new issue comment is created (including PR comments)
+on: issue_comment
+
+jobs:
+  sync_issue_comments_to_jira:
+    name: Sync Issue Comments to Jira
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Sync issue comments to JIRA
+        uses: espressif/sync-jira-actions@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          JIRA_PASS: ${{ secrets.JIRA_PASS }}
+          JIRA_PROJECT: IDFGH
+          JIRA_COMPONENT: heap
+          JIRA_URL: ${{ secrets.JIRA_URL }}
+          JIRA_USER: ${{ secrets.JIRA_USER }}
diff --git a/lib/tlsf/.github/workflows/new_issues.yml b/lib/tlsf/.github/workflows/new_issues.yml
new file mode 100644
index 0000000..e67def9
--- /dev/null
+++ b/lib/tlsf/.github/workflows/new_issues.yml
@@ -0,0 +1,21 @@
+name: Sync issues to Jira
+
+# This workflow will be triggered when a new issue is opened
+on: issues
+
+jobs:
+  sync_issues_to_jira:
+    name: Sync issues to Jira
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Sync GitHub issues to Jira project
+        uses: espressif/sync-jira-actions@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          JIRA_PASS: ${{ secrets.JIRA_PASS }}
+          JIRA_PROJECT: IDFGH
+          JIRA_COMPONENT: heap
+          JIRA_URL: ${{ secrets.JIRA_URL }}
+          JIRA_USER: ${{ secrets.JIRA_USER }}
diff --git a/lib/tlsf/.github/workflows/new_prs.yml b/lib/tlsf/.github/workflows/new_prs.yml
new file mode 100644
index 0000000..7f025b1
--- /dev/null
+++ b/lib/tlsf/.github/workflows/new_prs.yml
@@ -0,0 +1,26 @@
+name: Sync remain PRs to Jira
+
+# This workflow will be triggered every hour, to sync remaining PRs (i.e. PRs with zero comment) to Jira project
+# Note that, PRs can also get synced when new PR comment is created
+on:
+  schedule:
+    - cron: "0 * * * *"
+
+jobs:
+  sync_prs_to_jira:
+    name: Sync PRs to Jira
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Sync PRs to Jira project
+        uses: espressif/sync-jira-actions@v1
+        with:
+          cron_job: true
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          JIRA_PASS: ${{ secrets.JIRA_PASS }}
+          JIRA_PROJECT: IDFGH
+          JIRA_COMPONENT: heap
+          JIRA_URL: ${{ secrets.JIRA_URL }}
+          JIRA_USER: ${{ secrets.JIRA_USER }}
diff --git a/lib/tlsf/README.md b/lib/tlsf/README.md
new file mode 100644
index 0000000..982919f
--- /dev/null
+++ b/lib/tlsf/README.md
@@ -0,0 +1,92 @@
+# tlsf
+Two-Level Segregated Fit memory allocator implementation.
+Written by Matthew Conte (matt@baisoku.org).
+Released under the BSD license.
+
+Features
+--------
+  * O(1) cost for malloc, free, realloc, memalign
+  * Extremely low overhead per allocation (4 bytes)
+  * Low overhead per TLSF management of pools (~3kB)
+  * Low fragmentation
+  * Compiles to only a few kB of code and data
+  * Support for adding and removing memory pool regions on the fly
+
+Caveats
+-------
+  * Currently, assumes architecture can make 4-byte aligned accesses
+  * Not designed to be thread safe; the user must provide this
+
+Notes
+-----
+This code was based on the TLSF 1.4 spec and documentation found at:
+
+	http://www.gii.upv.es/tlsf/main/docs
+
+It also leverages the TLSF 2.0 improvement to shrink the per-block overhead from 8 to 4 bytes.
+
+History
+-------
+2016/04/10 - v3.1
+  * Code moved to github
+  * tlsfbits.h rolled into tlsf.c
+  * License changed to BSD
+
+2014/02/08 - v3.0
+  * This version is based on improvements from 3DInteractive GmbH
+  * Interface changed to allow more than one memory pool
+  * Separated pool handling from control structure (adding, removing, debugging)
+  * Control structure and pools can still be constructed in the same memory block
+  * Memory blocks for control structure and pools are checked for alignment
+  * Added functions to retrieve control structure size, alignment size, min and max block size, overhead of pool structure, and overhead of a single allocation
+  * Minimal Pool size is tlsf_block_size_min() + tlsf_pool_overhead()
+  * Pool must be empty when it is removed, in order to allow O(1) removal
+
+2011/10/20 - v2.0
+  * 64-bit support
+  * More compiler intrinsics for ffs/fls
+  * ffs/fls verification during TLSF creation in debug builds
+
+2008/04/04 - v1.9
+  * Add tlsf_heap_check, a heap integrity check
+  * Support a predefined tlsf_assert macro
+  * Fix realloc case where block should shrink; if adjacent block is in use, execution would go down the slow path
+
+2007/02/08 - v1.8
+  * Fix for unnecessary reallocation in tlsf_realloc
+
+2007/02/03 - v1.7
+  * tlsf_heap_walk takes a callback
+  * tlsf_realloc now returns NULL on failure
+  * tlsf_memalign optimization for 4-byte alignment
+  * Usage of size_t where appropriate
+
+2006/11/21 - v1.6
+  * ffs/fls broken out into tlsfbits.h
+  * tlsf_overhead queries per-pool overhead
+
+2006/11/07 - v1.5
+  * Smart realloc implementation
+  * Smart memalign implementation
+
+2006/10/11 - v1.4
+  * Add some ffs/fls implementations
+  * Minor code footprint reduction
+
+2006/09/14 - v1.3
+  * Profiling indicates heavy use of blocks of size 1-128, so implement small block handling
+  * Reduce pool overhead by about 1kb
+  * Reduce minimum block size from 32 to 12 bytes
+  * Realloc bug fix
+
+2006/09/09 - v1.2
+  * Add tlsf_block_size
+  * Static assertion mechanism for invariants
+  * Minor bugfixes 
+
+2006/09/01 - v1.1
+  * Add tlsf_realloc
+  * Add tlsf_walk_heap
+
+2006/08/25 - v1.0
+  * First release
diff --git a/lib/tlsf/tlsf.c b/lib/tlsf/tlsf.c
new file mode 100644
index 0000000..1e4c348
--- /dev/null
+++ b/lib/tlsf/tlsf.c
@@ -0,0 +1,1290 @@
+/*
+ * SPDX-FileCopyrightText: 2006-2016 Matthew Conte
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+#include "tlsf.h"
+#include "tlsf_common.h"
+#include "tlsf_block_functions.h"
+
+#if defined(__cplusplus)
+#define tlsf_decl inline
+#else
+#define tlsf_decl static inline __attribute__((always_inline))
+#endif
+
+/*
+** Architecture-specific bit manipulation routines.
+**
+** TLSF achieves O(1) cost for malloc and free operations by limiting
+** the search for a free block to a free list of guaranteed size
+** adequate to fulfill the request, combined with efficient free list
+** queries using bitmasks and architecture-specific bit-manipulation
+** routines.
+**
+** Most modern processors provide instructions to count leading zeroes
+** in a word, find the lowest and highest set bit, etc. These
+** specific implementations will be used when available, falling back
+** to a reasonably efficient generic implementation.
+**
+** NOTE: TLSF spec relies on ffs/fls returning value 0..31.
+** ffs/fls return 1-32 by default, returning 0 for error.
+*/
+
+/*
+** Detect whether or not we are building for a 32- or 64-bit (LP/LLP)
+** architecture. There is no reliable portable method at compile-time.
+*/
+#if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) \
+	|| defined (_WIN64) || defined (__LP64__) || defined (__LLP64__)
+#define TLSF_64BIT
+#endif
+
+/*
+** gcc 3.4 and above have builtin support, specialized for architecture.
+** Some compilers masquerade as gcc; patchlevel test filters them out.
+*/
+#if defined (__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \
+	&& defined (__GNUC_PATCHLEVEL__)
+
+#if defined (__SNC__)
+/* SNC for Playstation 3. */
+
+tlsf_decl int tlsf_ffs(unsigned int word)
+{
+	const unsigned int reverse = word & (~word + 1);
+	const int bit = 32 - __builtin_clz(reverse);
+	return bit - 1;
+}
+
+#else
+
+tlsf_decl int tlsf_ffs(unsigned int word)
+{
+	return __builtin_ffs(word) - 1;
+}
+
+#endif
+
+tlsf_decl int tlsf_fls(unsigned int word)
+{
+	const int bit = word ? 32 - __builtin_clz(word) : 0;
+	return bit - 1;
+}
+
+#elif defined (_MSC_VER) && (_MSC_VER >= 1400) && (defined (_M_IX86) || defined (_M_X64))
+/* Microsoft Visual C++ support on x86/X64 architectures. */
+
+#include <intrin.h>
+
+#pragma intrinsic(_BitScanReverse)
+#pragma intrinsic(_BitScanForward)
+
+tlsf_decl int tlsf_fls(unsigned int word)
+{
+	unsigned long index;
+	return _BitScanReverse(&index, word) ? index : -1;
+}
+
+tlsf_decl int tlsf_ffs(unsigned int word)
+{
+	unsigned long index;
+	return _BitScanForward(&index, word) ? index : -1;
+}
+
+#elif defined (_MSC_VER) && defined (_M_PPC)
+/* Microsoft Visual C++ support on PowerPC architectures. */
+
+#include <ppcintrinsics.h>
+
+tlsf_decl int tlsf_fls(unsigned int word)
+{
+	const int bit = 32 - _CountLeadingZeros(word);
+	return bit - 1;
+}
+
+tlsf_decl int tlsf_ffs(unsigned int word)
+{
+	const unsigned int reverse = word & (~word + 1);
+	const int bit = 32 - _CountLeadingZeros(reverse);
+	return bit - 1;
+}
+
+#elif defined (__ARMCC_VERSION)
+/* RealView Compilation Tools for ARM */
+
+tlsf_decl int tlsf_ffs(unsigned int word)
+{
+	const unsigned int reverse = word & (~word + 1);
+	const int bit = 32 - __clz(reverse);
+	return bit - 1;
+}
+
+tlsf_decl int tlsf_fls(unsigned int word)
+{
+	const int bit = word ? 32 - __clz(word) : 0;
+	return bit - 1;
+}
+
+#elif defined (__ghs__)
+/* Green Hills support for PowerPC */
+
+#include <ppc_ghs.h>
+
+tlsf_decl int tlsf_ffs(unsigned int word)
+{
+	const unsigned int reverse = word & (~word + 1);
+	const int bit = 32 - __CLZ32(reverse);
+	return bit - 1;
+}
+
+tlsf_decl int tlsf_fls(unsigned int word)
+{
+	const int bit = word ? 32 - __CLZ32(word) : 0;
+	return bit - 1;
+}
+
+#else
+/* Fall back to generic implementation. */
+
+tlsf_decl int tlsf_fls_generic(unsigned int word)
+{
+	int bit = 32;
+
+	if (!word) bit -= 1;
+	if (!(word & 0xffff0000)) { word <<= 16; bit -= 16; }
+	if (!(word & 0xff000000)) { word <<= 8; bit -= 8; }
+	if (!(word & 0xf0000000)) { word <<= 4; bit -= 4; }
+	if (!(word & 0xc0000000)) { word <<= 2; bit -= 2; }
+	if (!(word & 0x80000000)) { word <<= 1; bit -= 1; }
+
+	return bit;
+}
+
+/* Implement ffs in terms of fls. */
+tlsf_decl int tlsf_ffs(unsigned int word)
+{
+	return tlsf_fls_generic(word & (~word + 1)) - 1;
+}
+
+tlsf_decl int tlsf_fls(unsigned int word)
+{
+	return tlsf_fls_generic(word) - 1;
+}
+
+#endif
+
+/* Possibly 64-bit version of tlsf_fls. */
+#if defined (TLSF_64BIT)
+tlsf_decl int tlsf_fls_sizet(size_t size)
+{
+	int high = (int)(size >> 32);
+	int bits = 0;
+	if (high)
+	{
+		bits = 32 + tlsf_fls(high);
+	}
+	else
+	{
+		bits = tlsf_fls((int)size & 0xffffffff);
+
+	}
+	return bits;
+}
+#else
+#define tlsf_fls_sizet tlsf_fls
+#endif
+
+#undef tlsf_decl
+
+/*
+** Static assertion mechanism.
+*/
+
+#define _tlsf_glue2(x, y) x ## y
+#define _tlsf_glue(x, y) _tlsf_glue2(x, y)
+#define tlsf_static_assert(exp) \
+	typedef char _tlsf_glue(static_assert, __LINE__) [(exp) ? 1 : -1]
+
+/* This code has been tested on 32- and 64-bit (LP/LLP) architectures. */
+tlsf_static_assert(sizeof(int) * CHAR_BIT == 32);
+tlsf_static_assert(sizeof(size_t) * CHAR_BIT >= 32);
+tlsf_static_assert(sizeof(size_t) * CHAR_BIT <= 64);
+
+static inline __attribute__((always_inline)) size_t align_up(size_t x, size_t align)
+{
+	tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");
+	return (x + (align - 1)) & ~(align - 1);
+}
+
+static inline __attribute__((always_inline)) size_t align_down(size_t x, size_t align)
+{
+	tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");
+	return x - (x & (align - 1));
+}
+
+static inline __attribute__((always_inline)) void* align_ptr(const void* ptr, size_t align)
+{
+	const tlsfptr_t aligned =
+		(tlsf_cast(tlsfptr_t, ptr) + (align - 1)) & ~(align - 1);
+	tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");
+	return tlsf_cast(void*, aligned);
+}
+
+/*
+** Adjust an allocation size to be aligned to word size, and no smaller
+** than internal minimum.
+*/
+static inline __attribute__((always_inline)) size_t adjust_request_size(tlsf_t tlsf, size_t size, size_t align)
+{
+	size_t adjust = 0;
+	if (size)
+	{
+		const size_t aligned = align_up(size, align);
+
+		/* aligned sized must not exceed block_size_max or we'll go out of bounds on sl_bitmap */
+		if (aligned < tlsf_block_size_max(tlsf)) 
+		{
+			adjust = tlsf_max(aligned, block_size_min);
+		}
+	}
+	return adjust;
+}
+
+/*
+** TLSF utility functions. In most cases, these are direct translations of
+** the documentation found in the white paper.
+*/
+
+static inline __attribute__((always_inline)) void mapping_insert(control_t* control, size_t size, int* fli, int* sli)
+{
+	int fl, sl;
+	if (size < control->small_block_size)
+	{
+		/* Store small blocks in first list. */
+		fl = 0;
+		sl = tlsf_cast(int, size) / (control->small_block_size / control->sl_index_count);
+	}
+	else
+	{
+		fl = tlsf_fls_sizet(size);
+		sl = tlsf_cast(int, size >> (fl - control->sl_index_count_log2)) ^ (1 << control->sl_index_count_log2);
+		fl -= (control->fl_index_shift - 1);
+	}
+	*fli = fl;
+	*sli = sl;
+}
+
+/* This version rounds up to the next block size (for allocations) */
+static inline __attribute__((always_inline)) void mapping_search(control_t* control, size_t* size, int* fli, int* sli)
+{
+	if (*size >= control->small_block_size)
+	{
+		const size_t round = (1 << (tlsf_fls_sizet(*size) - control->sl_index_count_log2));
+		*size = align_up(*size, round);
+	}
+	mapping_insert(control, *size, fli, sli);
+}
+
+static inline __attribute__((always_inline)) block_header_t* search_suitable_block(control_t* control, int* fli, int* sli)
+{
+	int fl = *fli;
+	int sl = *sli;
+
+	/*
+	** First, search for a block in the list associated with the given
+	** fl/sl index.
+	*/
+	unsigned int sl_map = control->sl_bitmap[fl] & (~0U << sl);
+	if (!sl_map)
+	{
+		/* No block exists. Search in the next largest first-level list. */
+		const unsigned int fl_map = control->fl_bitmap & (~0U << (fl + 1));
+		if (!fl_map)
+		{
+			/* No free blocks available, memory has been exhausted. */
+			return 0;
+		}
+
+		fl = tlsf_ffs(fl_map);
+		*fli = fl;
+		sl_map = control->sl_bitmap[fl];
+	}
+	tlsf_assert(sl_map && "internal error - second level bitmap is null");
+	sl = tlsf_ffs(sl_map);
+	*sli = sl;
+
+	/* Return the first block in the free list. */
+	return control->blocks[fl * control->sl_index_count + sl];
+}
+
+/* Remove a free block from the free list.*/
+static inline __attribute__((always_inline)) void remove_free_block(control_t* control, block_header_t* block, int fl, int sl)
+{
+	block_header_t* prev = block->prev_free;
+	block_header_t* next = block->next_free;
+	tlsf_assert(prev && "prev_free field can not be null");
+	tlsf_assert(next && "next_free field can not be null");
+	next->prev_free = prev;
+	prev->next_free = next;
+
+	/* If this block is the head of the free list, set new head. */
+	if (control->blocks[fl * control->sl_index_count + sl] == block)
+	{
+		control->blocks[fl * control->sl_index_count + sl] = next;
+
+		/* If the new head is null, clear the bitmap. */
+		if (next == &control->block_null)
+		{
+			control->sl_bitmap[fl] &= ~(1U << sl);
+
+			/* If the second bitmap is now empty, clear the fl bitmap. */
+			if (!control->sl_bitmap[fl])
+			{
+				control->fl_bitmap &= ~(1U << fl);
+			}
+		}
+	}
+}
+
+/* Insert a free block into the free block list. */
+static inline __attribute__((always_inline)) void insert_free_block(control_t* control, block_header_t* block, int fl, int sl)
+{
+	block_header_t* current = control->blocks[fl * control->sl_index_count + sl];
+	tlsf_assert(current && "free list cannot have a null entry");
+	tlsf_assert(block && "cannot insert a null entry into the free list");
+	block->next_free = current;
+	block->prev_free = &control->block_null;
+	current->prev_free = block;
+
+	tlsf_assert(block_to_ptr(block) == align_ptr(block_to_ptr(block), ALIGN_SIZE)
+		&& "block not aligned properly");
+	/*
+	** Insert the new block at the head of the list, and mark the first-
+	** and second-level bitmaps appropriately.
+	*/
+	control->blocks[fl * control->sl_index_count + sl] = block;
+	control->fl_bitmap |= (1U << fl);
+	control->sl_bitmap[fl] |= (1U << sl);
+}
+
+/* Remove a given block from the free list. */
+static inline __attribute__((always_inline)) void block_remove(control_t* control, block_header_t* block)
+{
+	int fl, sl;
+	mapping_insert(control, block_size(block), &fl, &sl);
+	remove_free_block(control, block, fl, sl);
+}
+
+/* Insert a given block into the free list. */
+static inline __attribute__((always_inline)) void block_insert(control_t* control, block_header_t* block)
+{
+	int fl, sl;
+	mapping_insert(control, block_size(block), &fl, &sl);
+	insert_free_block(control, block, fl, sl);
+}
+
+static inline __attribute__((always_inline)) int block_can_split(block_header_t* block, size_t size)
+{
+	return block_size(block) >= sizeof(block_header_t) + size;
+}
+
+/* Split a block into two, the second of which is free. */
+static inline __attribute__((always_inline)) block_header_t* block_split(block_header_t* block, size_t size)
+{
+	/* Calculate the amount of space left in the remaining block.
+	 * REMINDER: remaining pointer's first field is `prev_phys_block` but this field is part of the
+	 * previous physical block. */
+	block_header_t* remaining =
+		offset_to_block(block_to_ptr(block), size - block_header_overhead);
+
+	/* `size` passed as an argument is the first block's new size, thus, the remaining block's size
+	 * is `block_size(block) - size`. However, the block's data must be precedeed by the data size.
+	 * This field is NOT part of the size, so it has to be substracted from the calculation. */
+	const size_t remain_size = block_size(block) - (size + block_header_overhead);
+
+	tlsf_assert(block_to_ptr(remaining) == align_ptr(block_to_ptr(remaining), ALIGN_SIZE)
+		&& "remaining block not aligned properly");
+
+	tlsf_assert(block_size(block) == remain_size + size + block_header_overhead);
+	block_set_size(remaining, remain_size);
+	tlsf_assert(block_size(remaining) >= block_size_min && "block split with invalid size");
+
+	block_set_size(block, size);
+	block_mark_as_free(remaining);
+
+	/**
+	 * Here is the final outcome of this function:
+	 *
+	 * block             remaining (block_ptr + size - BHO)
+	 * +                                +
+	 * |                                |
+	 * v                                v
+	 * +----------------------------------------------------------------------+
+	 * |0000|    |xxxxxxxxxxxxxxxxxxxxxx|xxxx|    |###########################|
+	 * |0000|    |xxxxxxxxxxxxxxxxxxxxxx|xxxx|    |###########################|
+	 * |0000|    |xxxxxxxxxxxxxxxxxxxxxx|xxxx|    |###########################|
+	 * |0000|    |xxxxxxxxxxxxxxxxxxxxxx|xxxx|    |###########################|
+	 * +----------------------------------------------------------------------+
+	 *      |    |                           |    |
+	 *      +    +<------------------------->+    +<------------------------->
+	 *       BHO    `size` (argument) bytes   BHO      `remain_size` bytes
+	 *
+	 * Where BHO = block_header_overhead,
+	 * 0: part of the memory owned by a `block`'s previous neighbour,
+	 * x: part of the memory owned by `block`.
+	 * #: part of the memory owned by `remaining`.
+	 */
+
+	return remaining;
+}
+
+/* Absorb a free block's storage into an adjacent previous free block. */
+static inline __attribute__((always_inline)) block_header_t* block_absorb(block_header_t* prev, block_header_t* block)
+{
+	tlsf_assert(!block_is_last(prev) && "previous block can't be last");
+	/* Note: Leaves flags untouched. */
+	prev->size += block_size(block) + block_header_overhead;
+	block_link_next(prev);
+
+	if (block_absorb_post_hook != NULL)
+	{
+		block_absorb_post_hook(block, sizeof(block_header_t), POISONING_AFTER_FREE);
+	}
+
+	return prev;
+}
+
+/* Merge a just-freed block with an adjacent previous free block. */
+static inline __attribute__((always_inline)) block_header_t* block_merge_prev(control_t* control, block_header_t* block)
+{
+	if (block_is_prev_free(block))
+	{
+		block_header_t* prev = block_prev(block);
+		tlsf_assert(prev && "prev physical block can't be null");
+		tlsf_assert(block_is_free(prev) && "prev block is not free though marked as such");
+		block_remove(control, prev);
+		block = block_absorb(prev, block);
+	}
+
+	return block;
+}
+
+/* Merge a just-freed block with an adjacent free block. */
+static inline __attribute__((always_inline)) block_header_t* block_merge_next(control_t* control, block_header_t* block)
+{
+	block_header_t* next = block_next(block);
+	tlsf_assert(next && "next physical block can't be null");
+
+	if (block_is_free(next))
+	{
+		tlsf_assert(!block_is_last(block) && "previous block can't be last");
+		block_remove(control, next);
+		block = block_absorb(block, next);
+	}
+
+	return block;
+}
+
+/* Trim any trailing block space off the end of a block, return to pool. */
+static inline __attribute__((always_inline)) void block_trim_free(control_t* control, block_header_t* block, size_t size)
+{
+	tlsf_assert(block_is_free(block) && "block must be free");
+	if (block_can_split(block, size))
+	{
+		block_header_t* remaining_block = block_split(block, size);
+		block_link_next(block);
+		block_set_prev_free(remaining_block);
+		block_insert(control, remaining_block);
+	}
+}
+
+/* Trim any trailing block space off the end of a used block, return to pool. */
+static inline __attribute__((always_inline)) void block_trim_used(control_t* control, block_header_t* block, size_t size)
+{
+	tlsf_assert(!block_is_free(block) && "block must be used");
+	if (block_can_split(block, size))
+	{
+		/* If the next block is free, we must coalesce. */
+		block_header_t* remaining_block = block_split(block, size);
+		block_set_prev_used(remaining_block);
+
+		remaining_block = block_merge_next(control, remaining_block);
+		block_insert(control, remaining_block);
+	}
+}
+
+static inline __attribute__((always_inline)) block_header_t* block_trim_free_leading(control_t* control, block_header_t* block, size_t size)
+{
+	block_header_t* remaining_block = block;
+	if (block_can_split(block, size))
+	{
+		/* We want to split `block` in two: the first block will be freed and the
+		 * second block will be returned. */
+		remaining_block = block_split(block, size - block_header_overhead);
+
+		/* `remaining_block` is the second block, mark its predecessor (first
+		 * block) as free. */
+		block_set_prev_free(remaining_block);
+
+		block_link_next(block);
+
+		/* Put back the first block into the free memory list. */
+		block_insert(control, block);
+	}
+
+	return remaining_block;
+}
+
+static inline __attribute__((always_inline)) block_header_t* block_locate_free(control_t* control, size_t* size)
+{
+	int fl = 0, sl = 0;
+	block_header_t* block = 0;
+
+	if (*size)
+	{
+		mapping_search(control, size, &fl, &sl);
+		
+		/*
+		** mapping_search can futz with the size, so for excessively large sizes it can sometimes wind up 
+		** with indices that are off the end of the block array.
+		** So, we protect against that here, since this is the only callsite of mapping_search.
+		** Note that we don't need to check sl, since it comes from a modulo operation that guarantees it's always in range.
+		*/
+		if (fl < control->fl_index_count)
+		{
+			block = search_suitable_block(control, &fl, &sl);
+		}
+	}
+
+	if (block)
+	{
+		tlsf_assert(block_size(block) >= *size);
+		remove_free_block(control, block, fl, sl);
+	}
+
+	return block;
+}
+
+static inline __attribute__((always_inline)) void* block_prepare_used(control_t* control, block_header_t* block, size_t size)
+{
+	void* p = 0;
+	if (block)
+	{
+		tlsf_assert(size && "size must be non-zero");
+		block_trim_free(control, block, size);
+		block_mark_as_used(block);
+		p = block_to_ptr(block);
+	}
+	return p;
+}
+
+/* Clear structure and point all empty lists at the null block. */
+static control_t* control_construct(control_t* control, size_t bytes)
+{
+	// check that the requested size can at least hold the control_t. This will allow us 
+	// to fill in the field of control_t necessary to determine the final size of 
+	// the metadata overhead and check that the requested size can hold
+	// this data and at least a block of minimum size
+	if (bytes < sizeof(control_t))
+	{
+		return NULL;
+	}
+
+	/* Find the closest power of two for first layer */
+	control->fl_index_max = 32 - __builtin_clz(bytes);
+
+	/* Adapt second layer to the pool */
+	if (bytes <= 16 * 1024) control->sl_index_count_log2 = 3;
+	else if (bytes <= 256 * 1024) control->sl_index_count_log2 = 4;
+	else control->sl_index_count_log2 = 5;
+
+	control->fl_index_shift = (control->sl_index_count_log2 + ALIGN_SIZE_LOG2);
+	control->sl_index_count = 1 << control->sl_index_count_log2;
+	control->fl_index_count = control->fl_index_max - control->fl_index_shift + 1;
+	control->small_block_size = 1 << control->fl_index_shift;
+	
+	// the total size fo the metadata overhead is the size of the control_t
+	// added to the size of the sl_bitmaps and the size of blocks
+	control->size = sizeof(control_t) + (sizeof(*control->sl_bitmap) * control->fl_index_count) +
+										(sizeof(*control->blocks) * (control->fl_index_count * control->sl_index_count));
+
+	// check that the requested size can hold the whole control structure and
+	// a small block at least
+	if (bytes < control->size + block_size_min)
+	{
+		return NULL;
+	}
+
+	control->block_null.next_free = &control->block_null;
+	control->block_null.prev_free = &control->block_null;
+
+	control->fl_bitmap = 0;
+	control->sl_bitmap = (unsigned int *)align_ptr(control + 1, sizeof(*control->sl_bitmap));
+	control->blocks = (block_header_t**)align_ptr(control->sl_bitmap + control->fl_index_count, sizeof(*control->blocks));
+
+
+	/* SL_INDEX_COUNT must be <= number of bits in sl_bitmap's storage type. */
+	tlsf_assert(sizeof(unsigned int) * CHAR_BIT >= control->sl_index_count
+		&& "CHAR_BIT less than sl_index_count");
+
+	/* Ensure we've properly tuned our sizes. */
+	tlsf_assert(ALIGN_SIZE == control->small_block_size / control->sl_index_count); //ALIGN_SIZE does not match");
+
+	for (int i = 0; i < control->fl_index_count; ++i)
+	{
+		control->sl_bitmap[i] = 0;
+		for (int j = 0; j < control->sl_index_count; ++j)
+		{
+			control->blocks[i * control->sl_index_count + j] = &control->block_null;
+		}
+	}
+
+	return control;
+}
+
+/*
+** Debugging utilities.
+*/
+
+typedef struct integrity_t
+{
+	int prev_status;
+	int status;
+} integrity_t;
+
+#define tlsf_insist(x) { if (!(x)) { status--; } }
+
+static bool integrity_walker(void* ptr, size_t size, int used, void* user)
+{
+	block_header_t* block = block_from_ptr(ptr);
+	integrity_t* integ = tlsf_cast(integrity_t*, user);
+	const int this_prev_status = block_is_prev_free(block) ? 1 : 0;
+	const int this_status = block_is_free(block) ? 1 : 0;
+	const size_t this_block_size = block_size(block);
+
+	int status = 0;
+	tlsf_insist(integ->prev_status == this_prev_status && "prev status incorrect");
+	tlsf_insist(size == this_block_size && "block size incorrect");
+
+	if (tlsf_check_hook != NULL)
+	{
+		/* block_size(block) returns the size of the usable memory when the block is allocated.
+		 * As the block under test is free, we need to subtract to the block size the next_free
+		 * and prev_free fields of the block header as they are not a part of the usable memory
+		 * when the block is free. In addition, we also need to subtract the size of prev_phys_block
+		 * as this field is in fact part of the current free block and not part of the next (allocated)
+		 * block. Check the comments in block_split function for more details.
+		 */
+		const size_t actual_free_block_size = used ? this_block_size : 
+													 this_block_size - offsetof(block_header_t, next_free)- block_header_overhead;
+		
+		void* ptr_block = used ? (char*)block + block_start_offset :
+								 (char*)block + sizeof(block_header_t);
+
+		tlsf_insist(tlsf_check_hook(ptr_block, actual_free_block_size, !used));
+	}
+
+	integ->prev_status = this_status;
+	integ->status += status;
+
+	return true;
+}
+
+
+int tlsf_check(tlsf_t tlsf)
+{
+	int i, j;
+
+	control_t* control = tlsf_cast(control_t*, tlsf);
+	int status = 0;
+
+	/* Check that the free lists and bitmaps are accurate. */
+	for (i = 0; i < control->fl_index_count; ++i)
+	{
+		for (j = 0; j < control->sl_index_count; ++j)
+		{
+			const int fl_map = control->fl_bitmap & (1U << i);
+			const int sl_list = control->sl_bitmap[i];
+			const int sl_map = sl_list & (1U << j);
+			const block_header_t* block = control->blocks[i * control->sl_index_count + j];
+
+			/* Check that first- and second-level lists agree. */
+			if (!fl_map)
+			{
+				tlsf_insist(!sl_map && "second-level map must be null");
+			}
+
+			if (!sl_map)
+			{
+				tlsf_insist(block == &control->block_null && "block list must be null");
+				continue;
+			}
+
+			/* Check that there is at least one free block. */
+			tlsf_insist(sl_list && "no free blocks in second-level map");
+			tlsf_insist(block != &control->block_null && "block should not be null");
+
+			while (block != &control->block_null)
+			{
+				int fli, sli;
+				const bool is_block_free = block_is_free(block);
+				tlsf_insist(is_block_free && "block should be free");
+				tlsf_insist(!block_is_prev_free(block) && "blocks should have coalesced");
+				tlsf_insist(!block_is_free(block_next(block)) && "blocks should have coalesced");
+				tlsf_insist(block_is_prev_free(block_next(block)) && "block should be free");
+				tlsf_insist(block_size(block) >= block_size_min && "block not minimum size");
+
+				mapping_insert(control, block_size(block), &fli, &sli);
+				tlsf_insist(fli == i && sli == j && "block size indexed in wrong list");
+
+				block = block->next_free;
+			}
+		}
+	}
+
+	return status;
+}
+
+#undef tlsf_insist
+
+static bool default_walker(void* ptr, size_t size, int used, void* user)
+{
+	(void)user;
+	printf("\t%p %s size: %x (%p)\n", ptr, used ? "used" : "free", (unsigned int)size, block_from_ptr(ptr));
+	return true;
+}
+
+void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user)
+{
+	tlsf_walker pool_walker = walker ? walker : default_walker;
+	block_header_t* block =
+		offset_to_block(pool, -(int)block_header_overhead);
+
+	bool ret_val = true;
+	while (block && !block_is_last(block) && ret_val == true)
+	{
+		ret_val = pool_walker(
+			block_to_ptr(block),
+			block_size(block),
+			!block_is_free(block),
+			user);
+
+		if (ret_val == true) {
+			block = block_next(block);
+		}
+	}
+}
+
+size_t tlsf_block_size(void* ptr)
+{
+	size_t size = 0;
+	if (ptr)
+	{
+		const block_header_t* block = block_from_ptr(ptr);
+		size = block_size(block);
+	}
+	return size;
+}
+
+int tlsf_check_pool(pool_t pool)
+{
+	/* Check that the blocks are physically correct. */
+	integrity_t integ = { 0, 0 };
+	tlsf_walk_pool(pool, integrity_walker, &integ);
+
+	return integ.status;
+}
+
+size_t tlsf_fit_size(tlsf_t tlsf, size_t size)
+{
+	if (size == 0 || tlsf == NULL) {
+		return 0;
+	}
+
+	control_t* control = tlsf_cast(control_t*, tlsf);
+	if (size < control->small_block_size) {
+		return adjust_request_size(tlsf, size, ALIGN_SIZE);
+	}
+
+	/* because it's GoodFit, allocable size is one range lower */
+	size_t sl_interval;
+	sl_interval = (1 << (32 - __builtin_clz(size) - 1)) / control->sl_index_count;
+	return size & ~(sl_interval - 1);
+}
+
+/*
+** Size of the TLSF structures in a given memory block passed to
+** tlsf_create, equal to the size of a control_t
+*/
+size_t tlsf_size(tlsf_t tlsf)
+{
+	if (tlsf == NULL)
+	{
+		return 0;
+	}
+	control_t* control = tlsf_cast(control_t*, tlsf);
+	return control->size;
+}
+
+size_t tlsf_align_size(void)
+{
+	return ALIGN_SIZE;
+}
+
+size_t tlsf_block_size_min(void)
+{
+	return block_size_min;
+}
+
+size_t tlsf_block_size_max(tlsf_t tlsf)
+{
+	if (tlsf == NULL)
+	{
+		return 0;
+	}
+	control_t* control = tlsf_cast(control_t*, tlsf);
+	return tlsf_cast(size_t, 1) << control->fl_index_max;
+}
+
+/*
+** Overhead of the TLSF structures in a given memory block passed to
+** tlsf_add_pool, equal to the overhead of a free block and the
+** sentinel block.
+*/
+size_t tlsf_pool_overhead(void)
+{
+	return 2 * block_header_overhead;
+}
+
+size_t tlsf_alloc_overhead(void)
+{
+	return block_header_overhead;
+}
+
+pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes)
+{
+	block_header_t* block;
+	block_header_t* next;
+
+	const size_t pool_overhead = tlsf_pool_overhead();
+	const size_t pool_bytes = align_down(bytes - pool_overhead, ALIGN_SIZE);
+
+	if (((ptrdiff_t)mem % ALIGN_SIZE) != 0)
+	{
+		printf("tlsf_add_pool: Memory must be aligned by %u bytes.\n",
+			(unsigned int)ALIGN_SIZE);
+		return 0;
+	}
+
+	if (pool_bytes < block_size_min || pool_bytes > tlsf_block_size_max(tlsf))
+	{
+#if defined (TLSF_64BIT)
+		printf("tlsf_add_pool: Memory size must be between 0x%x and 0x%x00 bytes.\n", 
+			(unsigned int)(pool_overhead + block_size_min),
+			(unsigned int)((pool_overhead + tlsf_block_size_max(tlsf)) / 256));
+#else
+		printf("tlsf_add_pool: Memory size must be between %u and %u bytes.\n", 
+			(unsigned int)(pool_overhead + block_size_min),
+			(unsigned int)(pool_overhead + tlsf_block_size_max(tlsf)));
+#endif
+		return 0;
+	}
+
+	/*
+	** Create the main free block. Offset the start of the block slightly
+	** so that the prev_phys_block field falls outside of the pool -
+	** it will never be used.
+	*/
+	block = offset_to_block(mem, -(tlsfptr_t)block_header_overhead);
+	block_set_size(block, pool_bytes);
+	block_set_free(block);
+	block_set_prev_used(block);
+	block_insert(tlsf_cast(control_t*, tlsf), block);
+
+	/* Split the block to create a zero-size sentinel block. */
+	next = block_link_next(block);
+	block_set_size(next, 0);
+	block_set_used(next);
+	block_set_prev_free(next);
+
+	return mem;
+}
+
+void tlsf_remove_pool(tlsf_t tlsf, pool_t pool)
+{
+	control_t* control = tlsf_cast(control_t*, tlsf);
+	block_header_t* block = offset_to_block(pool, -(int)block_header_overhead);
+
+	int fl = 0, sl = 0;
+
+	tlsf_assert(block_is_free(block) && "block should be free");
+	tlsf_assert(!block_is_free(block_next(block)) && "next block should not be free");
+	tlsf_assert(block_size(block_next(block)) == 0 && "next block size should be zero");
+
+	mapping_insert(control, block_size(block), &fl, &sl);
+	remove_free_block(control, block, fl, sl);
+}
+
+/*
+** TLSF main interface.
+*/
+
+#if _DEBUG
+int test_ffs_fls()
+{
+	/* Verify ffs/fls work properly. */
+	int rv = 0;
+	rv += (tlsf_ffs(0) == -1) ? 0 : 0x1;
+	rv += (tlsf_fls(0) == -1) ? 0 : 0x2;
+	rv += (tlsf_ffs(1) == 0) ? 0 : 0x4;
+	rv += (tlsf_fls(1) == 0) ? 0 : 0x8;
+	rv += (tlsf_ffs(0x80000000) == 31) ? 0 : 0x10;
+	rv += (tlsf_ffs(0x80008000) == 15) ? 0 : 0x20;
+	rv += (tlsf_fls(0x80000008) == 31) ? 0 : 0x40;
+	rv += (tlsf_fls(0x7FFFFFFF) == 30) ? 0 : 0x80;
+
+#if defined (TLSF_64BIT)
+	rv += (tlsf_fls_sizet(0x80000000) == 31) ? 0 : 0x100;
+	rv += (tlsf_fls_sizet(0x100000000) == 32) ? 0 : 0x200;
+	rv += (tlsf_fls_sizet(0xffffffffffffffff) == 63) ? 0 : 0x400;
+#endif
+
+	if (rv)
+	{
+		printf("test_ffs_fls: %x ffs/fls tests failed.\n", rv);
+	}
+	return rv;
+}
+#endif
+
+tlsf_t tlsf_create(void* mem, size_t max_bytes)
+{
+#if _DEBUG
+	if (test_ffs_fls())
+	{
+		return NULL;
+	}
+#endif
+
+	if (mem == NULL)
+	{
+		return NULL;
+	}
+
+	if (((tlsfptr_t)mem % ALIGN_SIZE) != 0)
+	{
+		printf("tlsf_create: Memory must be aligned to %u bytes.\n",
+			(unsigned int)ALIGN_SIZE);
+		return NULL;
+	}
+
+	control_t* control_ptr = control_construct(tlsf_cast(control_t*, mem), max_bytes);
+	return tlsf_cast(tlsf_t, control_ptr);
+}
+
+tlsf_t tlsf_create_with_pool(void* mem, size_t pool_bytes, size_t max_bytes)
+{
+	tlsf_t tlsf = tlsf_create(mem, max_bytes ? max_bytes : pool_bytes);
+	if (tlsf != NULL)
+	{
+		tlsf_add_pool(tlsf, (char*)mem + tlsf_size(tlsf), pool_bytes - tlsf_size(tlsf));
+	}
+	return tlsf;
+}
+
+void tlsf_destroy(tlsf_t tlsf)
+{
+	/* Nothing to do. */
+	(void)tlsf;
+}
+
+pool_t tlsf_get_pool(tlsf_t tlsf)
+{
+	return tlsf_cast(pool_t, (char*)tlsf + tlsf_size(tlsf));
+}
+
+void* tlsf_malloc(tlsf_t tlsf, size_t size)
+{
+	control_t* control = tlsf_cast(control_t*, tlsf);
+	size_t adjust = adjust_request_size(tlsf, size, ALIGN_SIZE);
+	// Returned size is 0 when the requested size is larger than the max block
+	// size.
+	if (adjust == 0) {
+		return NULL;
+	}
+	// block_locate_free() may adjust our allocated size further.
+	block_header_t* block = block_locate_free(control, &adjust);
+	return block_prepare_used(control, block, adjust);
+}
+
+/**
+ * @brief Allocate memory of at least `size` bytes at a given address in the pool.
+ *
+ * @param tlsf TLSF structure to allocate memory from.
+ * @param size Minimum size, in bytes, of the memory to allocate
+ * @param address address at which the allocation must be done
+ *
+ * @return pointer to free memory or NULL in case of incapacity to perform the malloc
+ */
+void* tlsf_malloc_addr(tlsf_t tlsf, size_t size, void *address)
+{
+	control_t* control = tlsf_cast(control_t*, tlsf);
+
+	/* adjust the address to be ALIGN_SIZE bytes aligned. */
+	const unsigned int addr_adjusted = align_down(tlsf_cast(unsigned int, address), ALIGN_SIZE);
+
+	/* adjust the size to be ALIGN_SIZE bytes aligned. Add to the size the difference
+	 * between the requested address and the address_adjusted. */
+	size_t size_adjusted = align_up(size + (tlsf_cast(unsigned int, address) - addr_adjusted), ALIGN_SIZE);
+
+	/* find the free block that starts before the address in the pool and is big enough
+	 * to support the size of allocation at the given address */
+	block_header_t* block = offset_to_block(tlsf_get_pool(tlsf), -(int)block_header_overhead);
+	
+	const char *alloc_start = tlsf_cast(char*, addr_adjusted);
+	const char *alloc_end = alloc_start + size_adjusted;
+	bool block_found = false;
+	do {
+		const char *block_start = tlsf_cast(char*, block_to_ptr(block));
+		const char *block_end = tlsf_cast(char*, block_to_ptr(block)) + block_size(block);
+		if (block_start <= alloc_start && block_end > alloc_start) {
+			/* A: block_end >= alloc_end. B: block is free */
+			if (block_end < alloc_end || !block_is_free(block)) {
+				/* not(A) || not(B)
+				 * We won't find another suitable block from this point on
+				 * so we can break and return NULL */
+				break;
+			} 
+			/* A && B
+			 * The block can fit the alloc and is located at a position allowing for the alloc
+			 * to be placed at the given address. We can return from the while */
+			block_found = true;
+		} else if (!block_is_last(block)) {
+			/* the block doesn't match the expected criteria, continue with the next block */
+			block = block_next(block);
+		}
+
+	} while (!block_is_last(block) && block_found == false);
+
+	if (!block_found) {
+		return NULL;
+	}
+	
+	/* remove block from the free list since a part of it will be used */
+	block_remove(control, block);
+
+	/* trim any leading space or add the leading space to the overall requested size
+	 * if the leading space is not big enough to store a block of minimum size */
+	const size_t space_before_addr_adjusted = addr_adjusted - tlsf_cast(unsigned int, block_to_ptr(block));
+	block_header_t *return_block = block;
+	if (space_before_addr_adjusted >= block_size_min) {
+		return_block = block_trim_free_leading(control, block, space_before_addr_adjusted);
+	}
+	else {
+		size_adjusted += space_before_addr_adjusted;
+	}
+
+	/* trim trailing space if any and return a pointer to the first usable byte allocated */
+	return  block_prepare_used(control, return_block, size_adjusted);
+}
+
+/**
+ * @brief Allocate memory of at least `size` bytes where byte at `data_offset` will be aligned to `alignment`.
+ *
+ * This function will allocate memory pointed by `ptr`. However, the byte at `data_offset` of
+ * this piece of memory (i.e., byte at `ptr` + `data_offset`) will be aligned to `alignment`.
+ * This function is useful for allocating memory that will internally have a header, and the
+ * usable memory following the header (i.e. `ptr` + `data_offset`) must be aligned.
+ *
+ * For example, a call to `multi_heap_aligned_alloc_impl_offs(heap, 64, 256, 20)` will return a
+ * pointer `ptr` to free memory of minimum 64 bytes, where `ptr + 20` is aligned on `256`.
+ * So `(ptr + 20) % 256` equals 0.
+ *
+ * @param tlsf TLSF structure to allocate memory from.
+ * @param align Alignment for the returned pointer's offset.
+ * @param size Minimum size, in bytes, of the memory to allocate INCLUDING
+ *             `data_offset` bytes.
+ * @param data_offset Offset to be aligned on `alignment`. This can be 0, in
+ *                    this case, the returned pointer will be aligned on
+ *                    `alignment`. If it is not a multiple of CPU word size,
+ *                    it will be aligned up to the closest multiple of it.
+ *
+ * @return pointer to free memory.
+ */
+void* tlsf_memalign_offs(tlsf_t tlsf, size_t align, size_t size, size_t data_offset)
+{
+	control_t* control = tlsf_cast(control_t*, tlsf);
+	const size_t adjust = adjust_request_size(tlsf, size, ALIGN_SIZE);
+	const size_t off_adjust = align_up(data_offset, ALIGN_SIZE);
+
+	/*
+	** We must allocate an additional minimum block size bytes so that if
+	** our free block will leave an alignment gap which is smaller, we can
+	** trim a leading free block and release it back to the pool. We must
+	** do this because the previous physical block is in use, therefore
+	** the prev_phys_block field is not valid, and we can't simply adjust
+	** the size of that block.
+	*/
+	const size_t gap_minimum = sizeof(block_header_t) + off_adjust;
+	/* The offset is included in both `adjust` and `gap_minimum`, so we
+	** need to subtract it once.
+	*/
+	const size_t size_with_gap = adjust_request_size(tlsf, adjust + align + gap_minimum - off_adjust, align);
+
+	/*
+	** If alignment is less than or equal to base alignment, we're done, because
+	** we are guaranteed that the size is at least sizeof(block_header_t), enough
+	** to store next blocks' metadata. Plus, all pointers allocated will all be
+	** aligned on a 4-byte bound, so ptr + data_offset will also have this
+	** alignment constraint. Thus, the gap is not required.
+	** If we requested 0 bytes, return null, as tlsf_malloc(0) does.
+	*/
+	size_t aligned_size = (adjust && align > ALIGN_SIZE) ? size_with_gap : adjust;
+
+	block_header_t* block = block_locate_free(control, &aligned_size);
+
+	/* This can't be a static assert. */
+	tlsf_assert(sizeof(block_header_t) == block_size_min + block_header_overhead);
+
+	if (block)
+	{
+		void* ptr = block_to_ptr(block);
+		void* aligned = align_ptr(ptr, align);
+		size_t gap = tlsf_cast(size_t,
+			tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr));
+
+	   /*
+		** If gap size is too small or if there is no gap but we need one,
+		** offset to next aligned boundary.
+		** NOTE: No need for a gap if the alignment required is less than or is
+		** equal to ALIGN_SIZE.
+		*/
+		if ((gap && gap < gap_minimum) || (!gap && off_adjust && align > ALIGN_SIZE))
+		{
+			const size_t gap_remain = gap_minimum - gap;
+			const size_t offset = tlsf_max(gap_remain, align);
+			const void* next_aligned = tlsf_cast(void*,
+				tlsf_cast(tlsfptr_t, aligned) + offset);
+
+			aligned = align_ptr(next_aligned, align);
+			gap = tlsf_cast(size_t,
+				tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr));
+		}
+
+		if (gap)
+		{
+			tlsf_assert(gap >= gap_minimum && "gap size too small");
+			block = block_trim_free_leading(control, block, gap - off_adjust);
+		}
+	}
+
+	/* Preparing the block will also the trailing free memory. */
+	return block_prepare_used(control, block, adjust);
+}
+
+/**
+ * @brief Same as `tlsf_memalign_offs` function but with a 0 offset.
+ * The pointer returned is aligned on `align`.
+ */
+void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size)
+{
+	return tlsf_memalign_offs(tlsf, align, size, 0);
+}
+
+
+void tlsf_free(tlsf_t tlsf, void* ptr)
+{
+	/* Don't attempt to free a NULL pointer. */
+	if (ptr)
+	{
+		control_t* control = tlsf_cast(control_t*, tlsf);
+		block_header_t* block = block_from_ptr(ptr);
+		tlsf_assert(!block_is_free(block) && "block already marked as free");
+		block_mark_as_free(block);
+		block = block_merge_prev(control, block);
+		block = block_merge_next(control, block);
+		block_insert(control, block);
+	}
+}
+
+/*
+** The TLSF block information provides us with enough information to
+** provide a reasonably intelligent implementation of realloc, growing or
+** shrinking the currently allocated block as required.
+**
+** This routine handles the somewhat esoteric edge cases of realloc:
+** - a non-zero size with a null pointer will behave like malloc
+** - a zero size with a non-null pointer will behave like free
+** - a request that cannot be satisfied will leave the original buffer
+**   untouched
+** - an extended buffer size will leave the newly-allocated area with
+**   contents undefined
+*/
+void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size)
+{
+	control_t* control = tlsf_cast(control_t*, tlsf);
+	void* p = 0;
+
+	/* Zero-size requests are treated as free. */
+	if (ptr && size == 0)
+	{
+		tlsf_free(tlsf, ptr);
+	}
+	/* Requests with NULL pointers are treated as malloc. */
+	else if (!ptr)
+	{
+		p = tlsf_malloc(tlsf, size);
+	}
+	else
+	{
+		block_header_t* block = block_from_ptr(ptr);
+		block_header_t* next = block_next(block);
+
+		const size_t cursize = block_size(block);
+		const size_t combined = cursize + block_size(next) + block_header_overhead;
+		const size_t adjust = adjust_request_size(tlsf, size, ALIGN_SIZE);
+
+		// if adjust if equal to 0, the size is too big
+		if (adjust == 0)
+		{
+			return p;
+		}
+
+		tlsf_assert(!block_is_free(block) && "block already marked as free");
+
+		/*
+		** If the next block is used, or when combined with the current
+		** block, does not offer enough space, we must reallocate and copy.
+		*/
+		if (adjust > cursize && (!block_is_free(next) || adjust > combined))
+		{
+			p = tlsf_malloc(tlsf, size);
+			if (p)
+			{
+				const size_t minsize = tlsf_min(cursize, size);
+				memcpy(p, ptr, minsize);
+				tlsf_free(tlsf, ptr);
+			}
+		}
+		else
+		{
+			/* Do we need to expand to the next block? */
+			if (adjust > cursize)
+			{
+				block_merge_next(control, block);
+				block_mark_as_used(block);
+			}
+
+			/* Trim the resulting block and return the original pointer. */
+			block_trim_used(control, block, adjust);
+			p = ptr;
+		}
+	}
+
+	return p;
+}
diff --git a/lib/tlsf/tlsf.h b/lib/tlsf/tlsf.h
new file mode 100644
index 0000000..8f862b6
--- /dev/null
+++ b/lib/tlsf/tlsf.h
@@ -0,0 +1,95 @@
+/*
+ * SPDX-FileCopyrightText: 2006-2016 Matthew Conte
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef INCLUDED_tlsf
+#define INCLUDED_tlsf
+
+#include <stddef.h>
+#include <stdbool.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */
+/* pool_t: a block of memory that TLSF can manage. */
+typedef void* tlsf_t;
+typedef void* pool_t;
+
+/* Create/destroy a memory pool. */
+tlsf_t tlsf_create(void* mem, size_t max_bytes);
+tlsf_t tlsf_create_with_pool(void* mem, size_t pool_bytes, size_t max_bytes);
+void tlsf_destroy(tlsf_t tlsf);
+pool_t tlsf_get_pool(tlsf_t tlsf);
+
+/* Add/remove memory pools. */
+pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes);
+void tlsf_remove_pool(tlsf_t tlsf, pool_t pool);
+
+/* malloc/memalign/realloc/free replacements. */
+void* tlsf_malloc(tlsf_t tlsf, size_t size);
+void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size);
+void* tlsf_memalign_offs(tlsf_t tlsf, size_t align, size_t size, size_t offset);
+void* tlsf_malloc_addr(tlsf_t tlsf, size_t size, void *address);
+void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size);
+void tlsf_free(tlsf_t tlsf, void* ptr);
+
+/* Returns internal block size, not original request size */
+size_t tlsf_block_size(void* ptr);
+
+/* Overheads/limits of internal structures. */
+size_t tlsf_size(tlsf_t tlsf);
+size_t tlsf_align_size(void);
+size_t tlsf_block_size_min(void);
+size_t tlsf_block_size_max(tlsf_t tlsf);
+size_t tlsf_pool_overhead(void);
+size_t tlsf_alloc_overhead(void);
+
+/**
+ * @brief Return the allocable size based on the size passed
+ * as parameter
+ * 
+ * @param tlsf Pointer to the tlsf structure
+ * @param size The allocation size
+ * @return size_t The updated allocation size
+ */
+size_t tlsf_fit_size(tlsf_t tlsf, size_t size);
+
+/* Debugging. */
+typedef bool (*tlsf_walker)(void* ptr, size_t size, int used, void* user);
+void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user);
+/* Returns nonzero if any internal consistency check fails. */
+int tlsf_check(tlsf_t tlsf);
+int tlsf_check_pool(pool_t pool);
+
+/*!
+ * @brief Weak function filling the given memory with a given fill pattern.
+ * 
+ * @param start: pointer to the start of the memory region to fill
+ * @param size: size of the memory region to fill
+ * @param is_free: Indicate if the pattern to use the fill the region should be 
+ * an after free or after allocation pattern.
+ */
+__attribute__((weak)) void block_absorb_post_hook(void *start, size_t size, bool is_free);
+
+/**
+ * @brief Weak function called on every free block of memory allowing the user to implement
+ * application specific checks on the memory.
+ * 
+ * @param start The start pointer to the memory of a block
+ * @param size The size of the memory in the block
+ * @param is_free Set to true when the memory belongs to a free block.
+ * False if it belongs to an allocated block.
+ * @return true The checks found no inconsistency in the memory
+ * @return false The checks in the function highlighted an inconsistency in the memory
+ */
+__attribute__((weak))  bool tlsf_check_hook(void *start, size_t size, bool is_free);
+
+#if defined(__cplusplus)
+};
+#endif
+
+#endif
diff --git a/lib/tlsf/tlsf_block_functions.h b/lib/tlsf/tlsf_block_functions.h
new file mode 100644
index 0000000..e2cb06b
--- /dev/null
+++ b/lib/tlsf/tlsf_block_functions.h
@@ -0,0 +1,123 @@
+/*
+ * SPDX-FileCopyrightText: 2006-2016 Matthew Conte
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#pragma once
+#include "tlsf_common.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+** block_header_t member functions.
+*/
+
+static inline __attribute__((always_inline)) size_t block_size(const block_header_t* block)
+{
+	return block->size & ~(block_header_free_bit | block_header_prev_free_bit);
+}
+
+static inline __attribute__((always_inline)) void block_set_size(block_header_t* block, size_t size)
+{
+	const size_t oldsize = block->size;
+	block->size = size | (oldsize & (block_header_free_bit | block_header_prev_free_bit));
+}
+
+static inline __attribute__((always_inline)) int block_is_last(const block_header_t* block)
+{
+	return block_size(block) == 0;
+}
+
+static inline __attribute__((always_inline)) int block_is_free(const block_header_t* block)
+{
+	return tlsf_cast(int, block->size & block_header_free_bit);
+}
+
+static inline __attribute__((always_inline)) void block_set_free(block_header_t* block)
+{
+	block->size |= block_header_free_bit;
+}
+
+static inline __attribute__((always_inline)) void block_set_used(block_header_t* block)
+{
+	block->size &= ~block_header_free_bit;
+}
+
+static inline __attribute__((always_inline)) int block_is_prev_free(const block_header_t* block)
+{
+	return tlsf_cast(int, block->size & block_header_prev_free_bit);
+}
+
+static inline __attribute__((always_inline)) void block_set_prev_free(block_header_t* block)
+{
+	block->size |= block_header_prev_free_bit;
+}
+
+static inline __attribute__((always_inline)) void block_set_prev_used(block_header_t* block)
+{
+	block->size &= ~block_header_prev_free_bit;
+}
+
+static inline __attribute__((always_inline)) block_header_t* block_from_ptr(const void* ptr)
+{
+	return tlsf_cast(block_header_t*,
+		tlsf_cast(unsigned char*, ptr) - block_start_offset);
+}
+
+static inline __attribute__((always_inline)) void* block_to_ptr(const block_header_t* block)
+{
+	return tlsf_cast(void*,
+		tlsf_cast(unsigned char*, block) + block_start_offset);
+}
+
+/* Return location of next block after block of given size. */
+static inline __attribute__((always_inline)) block_header_t* offset_to_block(const void* ptr, size_t size)
+{
+	return tlsf_cast(block_header_t*, tlsf_cast(tlsfptr_t, ptr) + size);
+}
+
+/* Return location of previous block. */
+static inline __attribute__((always_inline)) block_header_t* block_prev(const block_header_t* block)
+{
+	tlsf_assert(block_is_prev_free(block) && "previous block must be free");
+	return block->prev_phys_block;
+}
+
+/* Return location of next existing block. */
+static inline __attribute__((always_inline)) block_header_t* block_next(const block_header_t* block)
+{
+	block_header_t* next = offset_to_block(block_to_ptr(block),
+		block_size(block) - block_header_overhead);
+	tlsf_assert(!block_is_last(block));
+	return next;
+}
+
+/* Link a new block with its physical neighbor, return the neighbor. */
+static inline __attribute__((always_inline)) block_header_t* block_link_next(block_header_t* block)
+{
+	block_header_t* next = block_next(block);
+	next->prev_phys_block = block;
+	return next;
+}
+
+static inline __attribute__((always_inline)) void block_mark_as_free(block_header_t* block)
+{
+	/* Link the block to the next block, first. */
+	block_header_t* next = block_link_next(block);
+	block_set_prev_free(next);
+	block_set_free(block);
+}
+
+static inline __attribute__((always_inline)) void block_mark_as_used(block_header_t* block)
+{
+	block_header_t* next = block_next(block);
+	block_set_prev_used(next);
+	block_set_used(block);
+}
+
+#if defined(__cplusplus)
+};
+#endif
diff --git a/lib/tlsf/tlsf_common.h b/lib/tlsf/tlsf_common.h
new file mode 100644
index 0000000..35ab101
--- /dev/null
+++ b/lib/tlsf/tlsf_common.h
@@ -0,0 +1,127 @@
+/*
+ * SPDX-FileCopyrightText: 2006-2016 Matthew Conte
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#pragma once
+#include <stddef.h>
+#include <assert.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+** Constants definition for poisoning.
+** These defines are used as 3rd argument of tlsf_poison_fill_region() for readability purposes.
+*/
+#define POISONING_AFTER_FREE true
+#define POISONING_AFTER_MALLOC !POISONING_AFTER_FREE
+
+/*
+** Cast and min/max macros.
+*/
+#define tlsf_cast(t, exp)	((t) (exp))
+#define tlsf_min(a, b)		((a) < (b) ? (a) : (b))
+#define tlsf_max(a, b)		((a) > (b) ? (a) : (b))
+
+/*
+** Set assert macro, if it has not been provided by the user.
+*/
+#if !defined (tlsf_assert)
+#define tlsf_assert assert
+#endif
+
+enum tlsf_config
+{
+	/* All allocation sizes and addresses are aligned to 4 bytes. */
+	ALIGN_SIZE_LOG2 = 2,
+	ALIGN_SIZE = (1 << ALIGN_SIZE_LOG2),
+};
+
+/*
+** Data structures and associated constants.
+*/
+
+/* A type used for casting when doing pointer arithmetic. */
+typedef ptrdiff_t tlsfptr_t;
+
+typedef struct block_header_t
+{
+	/* Points to the previous physical block. */
+	struct block_header_t* prev_phys_block;
+
+	/* The size of this block, excluding the block header. */
+	size_t size;
+
+	/* Next and previous free blocks. */
+	struct block_header_t* next_free;
+	struct block_header_t* prev_free;
+} block_header_t;
+
+/*
+** Since block sizes are always at least a multiple of 4, the two least
+** significant bits of the size field are used to store the block status:
+** - bit 0: whether block is busy or free
+** - bit 1: whether previous block is busy or free
+*/
+static const size_t block_header_free_bit = 1 << 0;
+static const size_t block_header_prev_free_bit = 1 << 1;
+
+/*
+** The size of the block header exposed to used blocks is the size field.
+** The prev_phys_block field is stored *inside* the previous free block.
+*/
+static const size_t block_header_overhead = sizeof(size_t);
+
+/* User data starts directly after the size field in a used block. */
+static const size_t block_start_offset =
+	offsetof(block_header_t, size) + sizeof(size_t);
+
+/*
+** A free block must be large enough to store its header minus the size of
+** the prev_phys_block field, and no larger than the number of addressable
+** bits for FL_INDEX.
+*/
+static const size_t block_size_min = 
+	sizeof(block_header_t) - sizeof(block_header_t*);
+
+/* The TLSF control structure. */
+typedef struct control_t
+{
+    /* Empty lists point at this block to indicate they are free. */
+    block_header_t block_null;
+
+    /* Local parameter for the pool. Given the maximum
+	 * value of each field, all the following parameters
+	 * can fit on 4 bytes when using bitfields
+	 */
+    unsigned int fl_index_count : 5; // 5 cumulated bits
+    unsigned int fl_index_shift : 3; // 8 cumulated bits
+    unsigned int fl_index_max : 6; // 14 cumulated bits
+    unsigned int sl_index_count : 6; // 20 cumulated bits
+
+	/* log2 of number of linear subdivisions of block sizes. Larger
+	** values require more memory in the control structure. Values of
+	** 4 or 5 are typical.
+	*/
+    unsigned int sl_index_count_log2 : 3; // 23 cumulated bits
+    unsigned int small_block_size : 8; // 31 cumulated bits
+
+	/* size of the metadata ( size of control block,
+	 * sl_bitmap and blocks )
+	 */
+    size_t size;
+
+    /* Bitmaps for free lists. */
+    unsigned int fl_bitmap;
+    unsigned int *sl_bitmap;
+
+    /* Head of free lists. */
+    block_header_t** blocks;
+} control_t;
+
+#if defined(__cplusplus)
+};
+#endif
diff --git a/src/main.cpp b/src/main.cpp
index 5f5cbb8..4e153ef 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,13 +1,6 @@
 #include <stdio.h>
 #include "project_config.h"
 
-// Reverse compatibility with old PicoSDK.
-#if __has_include("bsp/board_api.h")
-#include "bsp/board_api.h"
-#else
-#include "bsp/board.h"
-#endif
-
 #include "midi_input_usb.h"
 #include "audio_subsystem.h"
 #include "picoadk_hw.h"
@@ -18,56 +11,39 @@
 
 #include "arduino_compat.h"
 
+#include <psram.h>
+
+volatile size_t psram_size;
+
 // Audio Buffer (Size is set in lib/audio/include/audio_subsystem.h)
 audio_buffer_pool_t *audio_pool;
 
-MIDIInputUSB usbMIDI;
-
 #ifdef __cplusplus
-extern "C" {
+extern "C"
+{
 #endif
 
-    /**
-     * Task to handle USB MIDI input processing.
-     *
-     * @param pvParameters Unused task parameters
-     */
-    void usb_midi_task(void *pvParameters)
-    {
-        // Setup MIDI Callbacks using lambdas
-        usbMIDI.setCCCallback([](uint8_t cc, uint8_t value, uint8_t channel) {
-            // Handle Control Change (CC) event
-            // e.g. Dsp_cc(ctx, cc, value, channel);
-        });
-
-        usbMIDI.setNoteOnCallback([](uint8_t note, uint8_t velocity, uint8_t channel) {
-            if (velocity > 0)
-            {
-                // Handle Note On event
-                // e.g. Dsp_noteOn(ctx, note, channel);
-            }
-            else
-            {
-                // Treat zero velocity as Note Off
-                // e.g. Dsp_noteOff(ctx, note, channel);
-            }
-        });
-
-        usbMIDI.setNoteOffCallback([](uint8_t note, uint8_t velocity, uint8_t channel) {
-            // Handle Note Off event
-            // e.g. Dsp_noteOff(ctx, note, channel);
-        });
+    inline int getTotalPSRAMHeap() {
+#if defined(RP2350_PSRAM_CS)
+        extern size_t __psram_total_space();
+        return __psram_total_space();
+#else
+        return 0;
+#endif
+    }
 
-        while (1)
-        {
-            // TinyUSB Device Task
-            #if (OPT_MODE_HOST == 1)
-            tuh_task();
-            #else
-            tud_task();
-            #endif
-            usbMIDI.process();
-        }
+     inline int getUsedPSRAMHeap() {
+#if defined(RP2350_PSRAM_CS)
+        extern size_t __psram_total_used();
+        return __psram_total_used();
+#else
+        return 0;
+#endif
+    }
+
+
+     inline int getFreePSRAMHeap() {
+        return getTotalPSRAMHeap() - getUsedPSRAMHeap();
     }
 
     /**
@@ -83,11 +59,16 @@ extern "C" {
         while (1)
         {
             gpio_put(2, 1);
-            vTaskDelay(pdMS_TO_TICKS(100)); // Delay for 100ms
+            vTaskDelay(pdMS_TO_TICKS(50)); // Delay for 100ms
             gpio_put(2, 0);
+            vTaskDelay(pdMS_TO_TICKS(100)); // Delay for 100ms
+
+            printf("Total PSRAM Heap: %d\n", getFreePSRAMHeap());
         }
     }
 
+
+
     /**
      * Main entry point.
      */
@@ -96,22 +77,25 @@ extern "C" {
         // Initialize hardware
         picoadk_init();
 
+
         // Initialize DSP engine (if needed)
 
         // Initialize the audio subsystem
         audio_pool = init_audio();
 
         // Create FreeRTOS tasks for MIDI handling and LED blinking
-        xTaskCreate(usb_midi_task, "USB_MIDI_Task", 4096, NULL, configMAX_PRIORITIES, NULL);
-        xTaskCreate(blinker_task, "Blinker_Task", 128, NULL, configMAX_PRIORITIES - 1, NULL);
+
+        //xTaskCreate(blinker_task, "Blinker_Task", 128, NULL, configMAX_PRIORITIES - 1, NULL);
 
         // Start the FreeRTOS scheduler
-        vTaskStartScheduler();
+       //vTaskStartScheduler();
 
         // Idle loop (this is fine for Cortex-M33)
         while (1)
         {
             // Could use `taskYIELD()` or similar here if needed
+            printf("Total PSRAM Heap: %d\n", getTotalPSRAMHeap());
+            sleep_ms(1000);
         }
     }
 
@@ -134,8 +118,8 @@ extern "C" {
         // Fill buffer with 32-bit samples (stereo, 2 channels)
         for (uint i = 0; i < buffer->max_sample_count; i++)
         {
-            samples[i * 2 + 0] = 0;   // Left channel sample
-            samples[i * 2 + 1] = 0;   // Right channel sample
+            samples[i * 2 + 0] = 0; // Left channel sample
+            samples[i * 2 + 1] = 0; // Right channel sample
             // Use your DSP function here for generating the audio samples
         }
 
diff --git a/src/picoadk_hw.cpp b/src/picoadk_hw.cpp
index 3ca026f..beb6802 100644
--- a/src/picoadk_hw.cpp
+++ b/src/picoadk_hw.cpp
@@ -1,5 +1,6 @@
 #include "picoadk_hw.h"
 #include "hardware/structs/xip_ctrl.h"
+#include <psram.h>
 
 void picoadk_init()
 {
@@ -11,9 +12,6 @@ void picoadk_init()
         #warning "No overclocking will performed, as this is untested on plaftorms other than the RP2040."
         #endif
 
-        // Initialize TinyUSB
-        board_init();
-        tusb_init();
         stdio_init_all();
 
         // set gpio 25 (soft mute) to output and set to 1 (unmute)
@@ -59,9 +57,6 @@ void picoadk_init()
         spi_set_format(spi1, 8, SPI_CPOL_0, SPI_CPHA_0, SPI_MSB_FIRST);
         spi_set_slave(spi1, false);
 
-        // Set up PSRAM
-        gpio_set_function(47, GPIO_FUNC_XIP_CS1); // CS for PSRAM
-        xip_ctrl_hw->ctrl|=XIP_CTRL_WRITABLE_M1_BITS;
 }
 
 int adc128_read(uint8_t chan)
diff --git a/src/psram.cpp b/src/psram.cpp
new file mode 100644
index 0000000..f18d0e2
--- /dev/null
+++ b/src/psram.cpp
@@ -0,0 +1,440 @@
+// Originally from https://github.com/sparkfun/sparkfun-pico
+/**
+    @file sfe_psram.c
+
+    @brief This file contains a function that is used to detect and initialize PSRAM on
+    SparkFun rp2350 boards.
+*/
+
+/*
+    The MIT License (MIT)
+
+    Copyright (c) 2024 SparkFun Electronics
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions: The
+    above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED
+    "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+    NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+    PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+// Hacked by Earle Philhower to work with the Arduino-Pico core
+
+
+#ifdef RP2350_PSRAM_CS
+
+#include <hardware/address_mapped.h>
+#include <hardware/clocks.h>
+#include <hardware/gpio.h>
+#include <hardware/sync.h>
+#include <hardware/regs/addressmap.h>
+#include <hardware/spi.h>
+#include <hardware/structs/qmi.h>
+#include <hardware/structs/xip_ctrl.h>
+#include <pico/runtime_init.h>
+
+// Include TLSF in this compilation unit
+#include "../lib/tlsf/tlsf.c"
+static tlsf_t _mem_heap = nullptr;
+static pool_t _mem_psram_pool = nullptr;
+
+// PSRAM heap minus PSRAM global/static variables from the linker
+extern "C" {
+    extern uint8_t __psram_start__;
+    extern uint8_t __psram_heap_start__;
+}
+
+static bool _bInitalized = false;
+size_t __psram_size = 0;
+size_t __psram_heap_size = 0;
+
+#define PICO_RUNTIME_INIT_PSRAM "11001" // Towards the end, after alarms
+
+#ifndef RP2350_PSRAM_MAX_SELECT_FS64
+#define RP2350_PSRAM_MAX_SELECT_FS64 (125'000'000)
+#endif
+
+#ifndef RP2350_PSRAM_MIN_DESELECT_FS
+#define RP2350_PSRAM_MIN_DESELECT_FS (50'000'000)
+#endif
+
+#ifndef RP2350_PSRAM_MAX_SCK_HZ
+#define RP2350_PSRAM_MAX_SCK_HZ (109'000'000)
+#endif
+
+#ifndef RP2350_PSRAM_ID
+#define RP2350_PSRAM_ID (0x5D)
+#endif
+
+// DETAILS/
+//
+// SparkFun RP2350 boards use the following PSRAM IC:
+//
+//      apmemory APS6404L-3SQR-ZR
+//      https://www.mouser.com/ProductDetail/AP-Memory/APS6404L-3SQR-ZR?qs=IS%252B4QmGtzzpDOdsCIglviw%3D%3D
+//
+// The origin of this logic is from the Circuit Python code that was downloaded from:
+//     https://github.com/raspberrypi/pico-sdk-rp2350/issues/12#issuecomment-2055274428
+//
+
+// Details on the PSRAM IC that are used during setup/configuration of PSRAM on SparkFun RP2350 boards.
+
+// For PSRAM timing calculations - to use int math, we work in femto seconds (fs) (1e-15),
+// NOTE: This idea is from micro python work on psram..
+
+#define SFE_SEC_TO_FS 1000000000000000ll
+
+// max select pulse width = 8us => 8e6 ns => 8000 ns => 8000 * 1e6 fs => 8000e6 fs
+// Additionally, the MAX select is in units of 64 clock cycles - will use a constant that
+// takes this into account - so 8000e6 fs / 64 = 125e6 fs
+
+const uint32_t SFE_PSRAM_MAX_SELECT_FS64 = RP2350_PSRAM_MAX_SELECT_FS64;
+
+// min deselect pulse width = 50ns => 50 * 1e6 fs => 50e7 fs
+const uint32_t SFE_PSRAM_MIN_DESELECT_FS = RP2350_PSRAM_MIN_DESELECT_FS;
+
+// from psram datasheet - max Freq with VDDat 3.3v - SparkFun RP2350 boards run at 3.3v.
+// If VDD = 3.0 Max Freq is 133 Mhz
+const uint32_t SFE_PSRAM_MAX_SCK_HZ = RP2350_PSRAM_MAX_SCK_HZ;
+
+// PSRAM SPI command codes
+const uint8_t PSRAM_CMD_QUAD_END = 0xF5;
+const uint8_t PSRAM_CMD_QUAD_ENABLE = 0x35;
+const uint8_t PSRAM_CMD_READ_ID = 0x9F;
+const uint8_t PSRAM_CMD_RSTEN = 0x66;
+const uint8_t PSRAM_CMD_RST = 0x99;
+const uint8_t PSRAM_CMD_QUAD_READ = 0xEB;
+const uint8_t PSRAM_CMD_QUAD_WRITE = 0x38;
+const uint8_t PSRAM_CMD_NOOP = 0xFF;
+
+const uint8_t PSRAM_ID = RP2350_PSRAM_ID;
+
+
+//-----------------------------------------------------------------------------
+/// @brief Communicate directly with the PSRAM IC - validate it is present and return the size
+///
+/// @return size_t The size of the PSRAM
+///
+/// @note This function expects the CS pin set
+static size_t __no_inline_not_in_flash_func(get_psram_size)(void) {
+    size_t psram_size = 0;
+    uint32_t intr_stash = save_and_disable_interrupts();
+
+    // Try and read the PSRAM ID via direct_csr.
+    qmi_hw->direct_csr = 30 << QMI_DIRECT_CSR_CLKDIV_LSB | QMI_DIRECT_CSR_EN_BITS;
+
+    // Need to poll for the cooldown on the last XIP transfer to expire
+    // (via direct-mode BUSY flag) before it is safe to perform the first
+    // direct-mode operation
+    while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) {
+    }
+
+    // Exit out of QMI in case we've inited already
+    qmi_hw->direct_csr |= QMI_DIRECT_CSR_ASSERT_CS1N_BITS;
+
+    // Transmit the command to exit QPI quad mode - read ID as standard SPI
+    qmi_hw->direct_tx =
+        QMI_DIRECT_TX_OE_BITS | QMI_DIRECT_TX_IWIDTH_VALUE_Q << QMI_DIRECT_TX_IWIDTH_LSB | PSRAM_CMD_QUAD_END;
+
+    while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) {
+    }
+
+    (void)qmi_hw->direct_rx;
+    qmi_hw->direct_csr &= ~(QMI_DIRECT_CSR_ASSERT_CS1N_BITS);
+
+    // Read the id
+    qmi_hw->direct_csr |= QMI_DIRECT_CSR_ASSERT_CS1N_BITS;
+    uint8_t kgd = 0;
+    uint8_t eid = 0;
+    for (size_t i = 0; i < 7; i++) {
+        qmi_hw->direct_tx = (i == 0 ? PSRAM_CMD_READ_ID : PSRAM_CMD_NOOP);
+
+        while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_TXEMPTY_BITS) == 0) {
+        }
+        while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) {
+        }
+        if (i == 5) {
+            kgd = qmi_hw->direct_rx;
+        } else if (i == 6) {
+            eid = qmi_hw->direct_rx;
+        } else {
+            (void)qmi_hw->direct_rx;    // just read and discard
+        }
+    }
+
+    // Disable direct csr.
+    qmi_hw->direct_csr &= ~(QMI_DIRECT_CSR_ASSERT_CS1N_BITS | QMI_DIRECT_CSR_EN_BITS);
+
+    // is this the PSRAM we're looking for obi-wan?
+    if (kgd == PSRAM_ID) {
+        // PSRAM size
+        psram_size = 1024 * 1024; // 1 MiB
+        uint8_t size_id = eid >> 5;
+        if (eid == 0x26 || size_id == 2) {
+            psram_size *= 8;
+        } else if (size_id == 0) {
+            psram_size *= 2;
+        } else if (size_id == 1) {
+            psram_size *= 4;
+        }
+    }
+    restore_interrupts(intr_stash);
+    return psram_size;
+}
+//-----------------------------------------------------------------------------
+/// @brief Update the PSRAM timing configuration based on system clock
+///
+/// @note This function expects interrupts to be enabled on entry
+
+static void __no_inline_not_in_flash_func(set_psram_timing)(void) {
+    // Get secs / cycle for the system clock - get before disabling interrupts.
+    uint32_t sysHz = (uint32_t)clock_get_hz(clk_sys);
+
+    // Calculate the clock divider - goal to get clock used for PSRAM <= what
+    // the PSRAM IC can handle - which is defined in SFE_PSRAM_MAX_SCK_HZ
+    volatile uint8_t clockDivider = (sysHz + SFE_PSRAM_MAX_SCK_HZ - 1) / SFE_PSRAM_MAX_SCK_HZ;
+
+    uint32_t intr_stash = save_and_disable_interrupts();
+
+    // Get the clock femto seconds per cycle.
+
+    uint32_t fsPerCycle = SFE_SEC_TO_FS / sysHz;
+
+    // the maxSelect value is defined in units of 64 clock cycles
+    // So maxFS / (64 * fsPerCycle) = maxSelect = SFE_PSRAM_MAX_SELECT_FS64/fsPerCycle
+    volatile uint8_t maxSelect = SFE_PSRAM_MAX_SELECT_FS64 / fsPerCycle;
+
+    //  minDeselect time - in system clock cycle
+    // Must be higher than 50ns (min deselect time for PSRAM) so add a fsPerCycle - 1 to round up
+    // So minFS/fsPerCycle = minDeselect = SFE_PSRAM_MIN_DESELECT_FS/fsPerCycle
+
+    volatile uint8_t minDeselect = (SFE_PSRAM_MIN_DESELECT_FS + fsPerCycle - 1) / fsPerCycle;
+
+    // printf("Max Select: %d, Min Deselect: %d, clock divider: %d\n", maxSelect, minDeselect, clockDivider);
+
+    qmi_hw->m[1].timing = QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | // Break between pages.
+                          3 << QMI_M1_TIMING_SELECT_HOLD_LSB | // Delay releasing CS for 3 extra system cycles.
+                          1 << QMI_M1_TIMING_COOLDOWN_LSB | 1 << QMI_M1_TIMING_RXDELAY_LSB |
+                          maxSelect << QMI_M1_TIMING_MAX_SELECT_LSB | minDeselect << QMI_M1_TIMING_MIN_DESELECT_LSB |
+                          clockDivider << QMI_M1_TIMING_CLKDIV_LSB;
+
+    restore_interrupts(intr_stash);
+}
+
+
+//-----------------------------------------------------------------------------
+/// @brief The setup_psram function - note that this is not in flash
+///
+///
+static void __no_inline_not_in_flash_func(runtime_init_setup_psram)(/*uint32_t psram_cs_pin*/) {
+    // Set the PSRAM CS pin in the SDK
+    gpio_set_function(RP2350_PSRAM_CS, GPIO_FUNC_XIP_CS1);
+
+    // start with zero size
+    size_t psram_size = get_psram_size();
+
+    // No PSRAM - no dice
+    if (psram_size == 0) {
+        return;
+    }
+
+    uint32_t intr_stash = save_and_disable_interrupts();
+    // Enable quad mode.
+    qmi_hw->direct_csr = 30 << QMI_DIRECT_CSR_CLKDIV_LSB | QMI_DIRECT_CSR_EN_BITS;
+
+    // Need to poll for the cooldown on the last XIP transfer to expire
+    // (via direct-mode BUSY flag) before it is safe to perform the first
+    // direct-mode operation
+    while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) {
+    }
+
+    // RESETEN, RESET and quad enable
+    for (uint8_t i = 0; i < 3; i++) {
+        qmi_hw->direct_csr |= QMI_DIRECT_CSR_ASSERT_CS1N_BITS;
+        if (i == 0) {
+            qmi_hw->direct_tx = PSRAM_CMD_RSTEN;
+        } else if (i == 1) {
+            qmi_hw->direct_tx = PSRAM_CMD_RST;
+        } else {
+            qmi_hw->direct_tx = PSRAM_CMD_QUAD_ENABLE;
+        }
+
+        while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) {
+        }
+        qmi_hw->direct_csr &= ~(QMI_DIRECT_CSR_ASSERT_CS1N_BITS);
+        for (size_t j = 0; j < 20; j++) {
+            asm("nop");
+        }
+
+        (void)qmi_hw->direct_rx;
+    }
+
+    // Disable direct csr.
+    qmi_hw->direct_csr &= ~(QMI_DIRECT_CSR_ASSERT_CS1N_BITS | QMI_DIRECT_CSR_EN_BITS);
+
+    // check our interrupts and setup the timing
+    restore_interrupts(intr_stash);
+    set_psram_timing();
+
+    // and now stash interrupts again
+    intr_stash = save_and_disable_interrupts();
+
+    qmi_hw->m[1].rfmt = (QMI_M1_RFMT_PREFIX_WIDTH_VALUE_Q << QMI_M1_RFMT_PREFIX_WIDTH_LSB |
+                         QMI_M1_RFMT_ADDR_WIDTH_VALUE_Q << QMI_M1_RFMT_ADDR_WIDTH_LSB |
+                         QMI_M1_RFMT_SUFFIX_WIDTH_VALUE_Q << QMI_M1_RFMT_SUFFIX_WIDTH_LSB |
+                         QMI_M1_RFMT_DUMMY_WIDTH_VALUE_Q << QMI_M1_RFMT_DUMMY_WIDTH_LSB |
+                         QMI_M1_RFMT_DUMMY_LEN_VALUE_24 << QMI_M1_RFMT_DUMMY_LEN_LSB |
+                         QMI_M1_RFMT_DATA_WIDTH_VALUE_Q << QMI_M1_RFMT_DATA_WIDTH_LSB |
+                         QMI_M1_RFMT_PREFIX_LEN_VALUE_8 << QMI_M1_RFMT_PREFIX_LEN_LSB |
+                         QMI_M1_RFMT_SUFFIX_LEN_VALUE_NONE << QMI_M1_RFMT_SUFFIX_LEN_LSB);
+
+    qmi_hw->m[1].rcmd = PSRAM_CMD_QUAD_READ << QMI_M1_RCMD_PREFIX_LSB | 0 << QMI_M1_RCMD_SUFFIX_LSB;
+
+    qmi_hw->m[1].wfmt = (QMI_M1_WFMT_PREFIX_WIDTH_VALUE_Q << QMI_M1_WFMT_PREFIX_WIDTH_LSB |
+                         QMI_M1_WFMT_ADDR_WIDTH_VALUE_Q << QMI_M1_WFMT_ADDR_WIDTH_LSB |
+                         QMI_M1_WFMT_SUFFIX_WIDTH_VALUE_Q << QMI_M1_WFMT_SUFFIX_WIDTH_LSB |
+                         QMI_M1_WFMT_DUMMY_WIDTH_VALUE_Q << QMI_M1_WFMT_DUMMY_WIDTH_LSB |
+                         QMI_M1_WFMT_DUMMY_LEN_VALUE_NONE << QMI_M1_WFMT_DUMMY_LEN_LSB |
+                         QMI_M1_WFMT_DATA_WIDTH_VALUE_Q << QMI_M1_WFMT_DATA_WIDTH_LSB |
+                         QMI_M1_WFMT_PREFIX_LEN_VALUE_8 << QMI_M1_WFMT_PREFIX_LEN_LSB |
+                         QMI_M1_WFMT_SUFFIX_LEN_VALUE_NONE << QMI_M1_WFMT_SUFFIX_LEN_LSB);
+
+    qmi_hw->m[1].wcmd = PSRAM_CMD_QUAD_WRITE << QMI_M1_WCMD_PREFIX_LSB | 0 << QMI_M1_WCMD_SUFFIX_LSB;
+
+    // Mark that we can write to PSRAM.
+    xip_ctrl_hw->ctrl |= XIP_CTRL_WRITABLE_M1_BITS;
+
+    restore_interrupts(intr_stash);
+
+    __psram_size = psram_size;
+
+    uint32_t used_psram_size = &__psram_heap_start__ - &__psram_start__;
+    __psram_heap_size = __psram_size - used_psram_size;
+}
+PICO_RUNTIME_INIT_FUNC_RUNTIME(runtime_init_setup_psram, PICO_RUNTIME_INIT_PSRAM);
+
+// update timing -- used if the system clock/timing was changed.
+void psram_reinit_timing() {
+    set_psram_timing();
+}
+
+static bool __psram_heap_init() {
+    if (_bInitalized) {
+        return true;
+    }
+
+    if (!__psram_heap_size) {
+        return false;
+    }
+    _mem_heap = NULL;
+    _mem_psram_pool = NULL;
+    _mem_heap = tlsf_create_with_pool((void *)&__psram_heap_start__, __psram_heap_size, 16 * 1024 * 1024);
+    if (!_mem_heap) {
+        return false;
+    }
+    _mem_psram_pool = tlsf_get_pool(_mem_heap);
+    if (!_mem_psram_pool) {
+        return false;
+    }
+    _bInitalized = true;
+    return true;
+}
+
+void *__psram_malloc(size_t size) {
+    if (!__psram_heap_init() || !_mem_heap) {
+        return NULL;
+    }
+    return tlsf_malloc(_mem_heap, size);
+}
+
+void __psram_free(void *ptr) {
+    if (!__psram_heap_init() || !_mem_heap) {
+        return;
+    }
+    tlsf_free(_mem_heap, ptr);
+}
+
+void *__psram_realloc(void *ptr, size_t size) {
+    if (!__psram_heap_init() || !_mem_heap) {
+        return NULL;
+    }
+    return tlsf_realloc(_mem_heap, ptr, size);
+}
+
+void *__psram_calloc(size_t num, size_t size) {
+    if (!__psram_heap_init() || !_mem_heap) {
+        return NULL;
+    }
+    void *ptr = tlsf_malloc(_mem_heap, num * size);
+    if (ptr) {
+        bzero(ptr, num * size);
+    }
+    return ptr;
+}
+
+static bool max_free_walker(void *ptr, size_t size, int used, void *user) {
+    size_t *max_size = (size_t *)user;
+    if (!used && *max_size < size) {
+        *max_size = size;
+    }
+    return true;
+}
+
+size_t __psram_largest_free_block() {
+    if (!__psram_heap_init() || !_mem_heap) {
+        return 0;
+    }
+    size_t max_free = 0;
+    if (_mem_psram_pool) {
+        tlsf_walk_pool(_mem_psram_pool, max_free_walker, &max_free);
+    }
+    return max_free;
+}
+
+static bool memory_size_walker(void *ptr, size_t size, int used, void *user) {
+    *((size_t *)user) += size;
+    return true;
+}
+
+size_t __psram_total_space() {
+    if (!__psram_heap_init() || !_mem_heap) {
+        return 0;
+    }
+    size_t total_size = 0;
+    if (_mem_psram_pool) {
+        tlsf_walk_pool(_mem_psram_pool, memory_size_walker, &total_size);
+    }
+    return total_size;
+}
+
+static bool memory_used_walker(void *ptr, size_t size, int used, void *user) {
+    if (used) {
+        *((size_t *)user) += size;
+    }
+    return true;
+}
+
+size_t __psram_total_used() {
+    if (!__psram_heap_init() || !_mem_heap) {
+        return 0;
+    }
+    size_t total_size = 0;
+    if (_mem_psram_pool) {
+        tlsf_walk_pool(_mem_psram_pool, memory_used_walker, &total_size);
+    }
+    return total_size;
+}
+
+
+#endif // RP2350_PSRAM_CS
+
diff --git a/tools/simplesub.py b/tools/simplesub.py
new file mode 100644
index 0000000..9fd18d1
--- /dev/null
+++ b/tools/simplesub.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+import sys
+import struct
+import subprocess
+import re
+import os
+import os.path
+import argparse
+import time
+
+def main():
+    parser = argparse.ArgumentParser(description='Simple text substitution')
+    parser.add_argument('-i', '--input', action='store', required=True, help='Path to the source file')
+    parser.add_argument('-o', '--out', action='store', required=True, help='Path to the output file')
+    parser.add_argument('-s', '--sub', action='append', nargs=2, metavar=('find', 'replace'), required=True, help='Substition')
+    args = parser.parse_args()
+
+    with open(args.input, "r") as fin:
+        data = fin.read()
+
+    for f, r in args.sub:
+        data = re.sub(f, r, data)
+
+    with open(args.out, "w") as fout:
+        fout.write(data)
+
+
+main()
+