diff --git a/.gitignore b/.gitignore index e47228c8..e037ea40 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ qdl/ .qemu_registered .vscode +.vscode-ctags edl_config.json tools/edl_repo/ diff --git a/.gitmodules b/.gitmodules index d91f9a6a..0f2cb41b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "agnos-binaries"] path = agnos-binaries url = ../../commaai/agnos-binaries.git +[submodule "kernel/linux"] + path = kernel/linux + url = https://github.com/andiradulescu/linux diff --git a/build_kernel.sh b/build_kernel.sh index bb70289b..afe881ea 100755 --- a/build_kernel.sh +++ b/build_kernel.sh @@ -1,28 +1,27 @@ #!/bin/bash -e -DEFCONFIG=tici_defconfig +DEFCONFIG="defconfig sdm845.config" # Get directories and make sure we're in the correct spot to start the build DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" +ARCH=$(uname -m) TOOLS=$DIR/tools -TMP_DIR=/tmp/agnos-builder-tmp +TMP_DIR=/tmp/agnos-builder-new-kernel-tmp OUTPUT_DIR=$DIR/output BOOT_IMG=./boot.img -cd $DIR +KERNEL_DIR=$DIR/kernel/linux -# Clone kernel if not done already -if git submodule status --cached agnos-kernel-sdm845/ | grep "^-"; then - git submodule update --init agnos-kernel-sdm845 -fi -cd agnos-kernel-sdm845 +cd $KERNEL_DIR -$DIR/tools/extract_tools.sh +if [ "$ARCH" != "arm64" ] && [ "$ARCH" != "aarch64" ]; then + $DIR/tools/extract_tools.sh -# Build parameters -export ARCH=arm64 -export CROSS_COMPILE=$TOOLS/aarch64-linux-gnu-gcc/bin/aarch64-linux-gnu- -export CC=$TOOLS/aarch64-linux-gnu-gcc/bin/aarch64-linux-gnu-gcc -export LD=$TOOLS/aarch64-linux-gnu-gcc/bin/aarch64-linux-gnu-ld.bfd + # Build parameters + export ARCH=arm64 + export CROSS_COMPILE=$TOOLS/aarch64-linux-gnu-gcc/bin/aarch64-linux-gnu- + export CC=$TOOLS/aarch64-linux-gnu-gcc/bin/aarch64-linux-gnu-gcc + export LD=$TOOLS/aarch64-linux-gnu-gcc/bin/aarch64-linux-gnu-ld.bfd +fi # these do anything? export KCFLAGS="-w" @@ -31,21 +30,27 @@ export KCFLAGS="-w" echo "-- First make --" make $DEFCONFIG O=out echo "-- Second make: $(nproc --all) cores --" -make -j$(nproc --all) O=out # Image.gz-dtb +make -j$(nproc --all) O=out # Image.gz # Turn on if you want perf # LDFLAGS=-static make -j$(nproc --all) -C tools/perf +# Create Image.gz-dtb +cd $KERNEL_DIR/out/arch/arm64/boot/ +# cat Image.gz dts/qcom/comma-*.dtb > Image.gz-dtb +# cat Image.gz dts/qcom/comma-tici.dtb > Image.gz-dtb +cat Image.gz dts/qcom/sdm845-comma3.dtb > Image.gz-dtb + # Copy over Image.gz-dtb mkdir -p $TMP_DIR cd $TMP_DIR -cp $DIR/agnos-kernel-sdm845/out/arch/arm64/boot/Image.gz-dtb . +cp $KERNEL_DIR/out/arch/arm64/boot/Image.gz-dtb . # Make boot image $TOOLS/mkbootimg \ --kernel Image.gz-dtb \ --ramdisk /dev/null \ - --cmdline "console=ttyMSM0,115200n8 quiet loglevel=3 earlycon=msm_geni_serial,0xA84000 androidboot.hardware=qcom androidboot.console=ttyMSM0 ehci-hcd.park=3 lpm_levels.sleep_disabled=1 service_locator.enable=1 androidboot.selinux=permissive firmware_class.path=/lib/firmware/updates net.ifnames=0 dyndbg=\"\"" \ + --cmdline "console=ttyMSM0,115200n8 earlycon=qcom_geni,0xA84000" \ --pagesize 4096 \ --base 0x80000000 \ --kernel_offset 0x8000 \ @@ -63,6 +68,8 @@ cat $BOOT_IMG.nonsecure $BOOT_IMG.sig.padded > $BOOT_IMG # Copy to output dir mkdir -p $OUTPUT_DIR mv $BOOT_IMG $OUTPUT_DIR/ -cp $DIR/agnos-kernel-sdm845/out/techpack/audio/asoc/snd-soc-sdm845.ko $OUTPUT_DIR/ -cp $DIR/agnos-kernel-sdm845/out/techpack/audio/asoc/codecs/snd-soc-wcd9xxx.ko $OUTPUT_DIR/ -cp $DIR/agnos-kernel-sdm845/out/drivers/staging/qcacld-3.0/wlan.ko $OUTPUT_DIR/ + +# These will come from the kernel debs +# cp $DIR/agnos-kernel-sdm845/out/techpack/audio/asoc/snd-soc-sdm845.ko $OUTPUT_DIR/ +# cp $DIR/agnos-kernel-sdm845/out/techpack/audio/asoc/codecs/snd-soc-wcd9xxx.ko $OUTPUT_DIR/ +# cp $DIR/agnos-kernel-sdm845/out/drivers/staging/qcacld-3.0/wlan.ko $OUTPUT_DIR/ diff --git a/build_kernel_headers.sh b/build_kernel_headers.sh index 8320f4ec..bb14040c 100755 --- a/build_kernel_headers.sh +++ b/build_kernel_headers.sh @@ -1,30 +1,29 @@ #!/bin/bash set -e -DEFCONFIG=tici_defconfig +DEFCONFIG="defconfig sdm845.config" # Get directories and make sure we're in the correct spot to start the build DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" +ARCH=$(uname -m) TOOLS=$DIR/tools -TMP_DIR=/tmp/agnos-builder-tmp +TMP_DIR=/tmp/agnos-builder-new-kernel-tmp OUTPUT_DIR=$DIR/output BOOT_IMG=./boot.img -cd $DIR +KERNEL_DIR=$DIR/kernel/linux -# Clone kernel if not done already -if [ ! -d agnos-kernel-sdm845 ]; then - git submodule init agnos-kernel-sdm845 -fi -cd agnos-kernel-sdm845 +cd $KERNEL_DIR -# Build parameters -export ARCH=arm64 -if [ ! -f /TICI ]; then +if [ "$ARCH" != "arm64" ] && [ "$ARCH" != "aarch64" ]; then + # Build parameters + export ARCH=arm64 export CROSS_COMPILE=$TOOLS/aarch64-linux-gnu-gcc/bin/aarch64-linux-gnu- export CC=$TOOLS/aarch64-linux-gnu-gcc/bin/aarch64-linux-gnu-gcc export LD=$TOOLS/aarch64-linux-gnu-gcc/bin/aarch64-linux-gnu-ld.bfd fi +rm -f *.deb *.buildinfo *.changes + # these do anything? export KCFLAGS="-w" @@ -36,9 +35,9 @@ ARGS="" if [ -f /TICI ]; then ARGS="sudo -E" fi -$ARGS make bindeb-pkg -j$(nproc --all) O=out # Image.gz-dtb +$ARGS make bindeb-pkg -j$(nproc --all) O=out # Copy output mkdir -p $OUTPUT_DIR -cp linux-headers-*.deb $OUTPUT_DIR - +rm -f $OUTPUT_DIR/linux-*.deb || true +cp *.deb $OUTPUT_DIR diff --git a/build_system.sh b/build_system.sh index f8fda718..02448705 100755 --- a/build_system.sh +++ b/build_system.sh @@ -10,6 +10,8 @@ export DOCKER_BUILDKIT=1 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" cd $DIR +ARCH=$(uname -m) + BUILD_DIR="$DIR/build" OUTPUT_DIR="$DIR/output" @@ -33,11 +35,11 @@ cp $OUTPUT_DIR/snd*.ko $DIR/userspace/usr/comma/sound/ # Download Ubuntu Base if not done already if [ ! -f $UBUNTU_FILE ]; then echo -e "${GREEN}Downloading Ubuntu: $UBUNTU_FILE ${NO_COLOR}" - wget -c $UBUNTU_BASE_URL/$UBUNTU_FILE --quiet + curl -C - -o $UBUNTU_FILE $UBUNTU_BASE_URL/$UBUNTU_FILE --silent fi -# Register qemu multiarch -if [ "$(uname -p)" != "aarch64" ]; then +if [ "$ARCH" != "arm64" ] && [ "$ARCH" != "aarch64" ]; then + # Register qemu multiarch docker run --rm --privileged multiarch/qemu-user-static:register --reset fi diff --git a/clean_kernel.sh b/clean_kernel.sh new file mode 100755 index 00000000..1a74c089 --- /dev/null +++ b/clean_kernel.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e + +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" +KERNEL_DIR=$DIR/kernel/linux + +cd $KERNEL_DIR + +make $DEFCONFIG O=out mrproper +rm -rf out +rm *.deb *.buildinfo *.changes diff --git a/kernel/linux b/kernel/linux new file mode 160000 index 00000000..019f8a3b --- /dev/null +++ b/kernel/linux @@ -0,0 +1 @@ +Subproject commit 019f8a3bc58a5f05616bfd79e47378b518bc3f5b diff --git a/load_kernel_headers.sh b/load_kernel_headers.sh index f8b7ec65..ca8b10ca 100755 --- a/load_kernel_headers.sh +++ b/load_kernel_headers.sh @@ -1,4 +1,4 @@ #!/bin/bash -scp output/linux-headers*.deb comma:/tmp/ -ssh comma "sudo apt install -yq /tmp/linux-headers*.deb" - +scp output/linux-headers*.deb comma@comma:/tmp/ +scp output/linux-image*.deb comma@comma:/tmp/ +ssh comma@comma "sudo apt install -yq /tmp/linux-*.deb" diff --git a/userspace/compile-mesa.sh b/userspace/compile-mesa.sh new file mode 100755 index 00000000..e72fbcdc --- /dev/null +++ b/userspace/compile-mesa.sh @@ -0,0 +1,180 @@ +#!/bin/bash -e + +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" + +# MESA_REPO="https://gitlab.freedesktop.org/mesa/mesa.git" +MESA_DIR="$DIR/mesa" +BUILD_DIR="build" +INSTALL_DIR="$DIR/mesa_install" +# INSTALL_DIR="/usr/local" +# INSTALL_DIR="$(mktemp -d)" +# https://gitlab.freedesktop.org/mesa/mesa/-/branches/all +# MESA_VERSION="23.3.0" +MESA_VERSION="24.0.5" +MESA_TAR="mesa-${MESA_VERSION}.tar.xz" +MESA_URL="https://archive.mesa3d.org/${MESA_TAR}" + +LLVM_VERSION="17" + +# Install dependencies +sudo apt update +sudo apt install -y \ + git \ + build-essential \ + meson \ + cmake \ + libglvnd-dev \ + libvdpau-dev \ + glslang-tools \ + libomxil-bellagio-dev \ + libva-dev \ + rustc \ + rustfmt \ + libclc-$LLVM_VERSION-dev \ + python3-mako \ + python3-pycparser \ + zlib1g-dev \ + libzstd-dev \ + libexpat1-dev \ + libbsd-dev \ + libdrm-dev \ + libudev-dev \ + llvm-$LLVM_VERSION-dev \ + libllvm$LLVM_VERSION \ + llvm-spirv-$LLVM_VERSION \ + libllvmspirvlib-$LLVM_VERSION-dev \ + clang-$LLVM_VERSION \ + libclang-$LLVM_VERSION-dev \ + libclang-cpp$LLVM_VERSION-dev \ + libelf-dev \ + valgrind \ + bison \ + byacc \ + flex \ + wayland-protocols \ + libwayland-dev \ + libwayland-egl-backend-dev \ + libxext-dev \ + libxfixes-dev \ + libxcb-glx0-dev \ + libxcb-shm0-dev \ + libx11-xcb-dev \ + libxcb-dri2-0-dev \ + libxcb-dri3-dev \ + libxcb-present-dev \ + libxshmfence-dev \ + libxxf86vm-dev \ + libxrandr-dev \ + bindgen \ + pkg-config + # old deps + # build-essential \ + # meson \ + # ninja-build \ + # libdrm-dev \ + # libx11-dev \ + # libxxf86vm-dev \ + # libxrandr-dev \ + # libxshmfence-dev \ + # libxdamage-dev \ + # libxext-dev \ + # libxfixes-dev \ + # libwayland-dev \ + # libglvnd-dev \ + # libelf-dev \ + # libunwind-dev \ + # libexpat1-dev \ + # libllvm-14-ocaml-dev \ + # libllvm14 \ + # llvm-14 \ + # llvm-14-dev \ + # llvm-14-runtime \ + # libclang-14-dev \ + # clang-14 \ + # libclang-cpp14-dev \ + # libvulkan-dev \ + # glslang-tools \ + # libzstd-dev \ + # libxcb-glx0-dev \ + # libxcb-shm0-dev \ + # libx11-xcb-dev \ + # libxcb-dri2-0-dev \ + # libxcb-dri3-dev \ + # libxcb-present-dev \ + # libxshmfence-dev \ + # libxxf86vm-dev \ + # libxrandr-dev \ + # libwayland-dev \ + # wayland-protocols \ + # libwayland-egl-backend-dev \ + # python3-mako \ + # libvdpau-dev + +# if [ ! -d "$MESA_DIR" ]; then +# git clone -b $MESA_VERSION_BRANCH --depth 1 --single-branch $MESA_REPO $MESA_DIR +# cd $MESA_DIR +# git apply $DIR/mesa-patches-$MESA_VERSION_BRANCH/*.patch +# fi + +# Download and extract Mesa +if [ ! -d "$MESA_DIR" ]; then + curl -L -o "$MESA_TAR" "$MESA_URL" + tar -xf "$MESA_TAR" + mv "mesa-${MESA_VERSION}" "$MESA_DIR" + rm "$MESA_TAR" + cd $MESA_DIR + for patch in $DIR/mesa-patches-$MESA_VERSION/*.patch; do + patch -p1 < "$patch" + done +fi + +cd $MESA_DIR + +export CFLAGS="$CFLAGS -O2 -g1" +export CXXFLAGS="$CXXFLAGS -O2 -g1" +export CPPFLAGS="$CPPFLAGS -O2 -g1" + +# Build Mesa with Freedreno and Rusticl support +# meson setup $BUILD_DIR --prefix=$INSTALL_DIR \ +# -Db_ndebug=true \ +# -Db_lto=true \ +# -Dgallium-drivers=freedreno \ +# -Dgallium-opencl=disabled \ +# -Dvulkan-drivers=freedreno \ +# -Dplatforms=wayland \ +# -Dglx=disabled \ +# -Dllvm=enabled \ +# -Dshared-llvm=enabled \ +# -Dgallium-rusticl=true \ +# -Drust_std=2021 + +meson setup $BUILD_DIR --prefix=$INSTALL_DIR \ + -Db_ndebug=true \ + -Db_lto=false \ + -Dgallium-drivers=freedreno \ + -Dgallium-opencl=disabled \ + -Dvulkan-drivers=freedreno \ + -Dplatforms=wayland \ + -Dglx=disabled \ + -Dllvm=enabled \ + -Dshared-llvm=enabled \ + -Dgallium-rusticl=true \ + -Drust_std=2021 \ + -Dgallium-va=disabled \ + -Dgallium-vdpau=disabled \ + -Dgallium-xa=disabled \ + -Dopengl=false \ + -Dosmesa=false \ + -Dgles1=disabled \ + -Dgles2=disabled \ + -Degl=disabled \ + -Dgallium-extra-hud=false \ + -Dgallium-nine=false + # -Dvideo-codecs=disabled + +ninja -C $BUILD_DIR + +# mkdir -p $INSTALL_DIR/lib +sudo ninja -C $BUILD_DIR install + +echo "Mesa $MESA_VERSION has been built and installed successfully to $INSTALL_DIR" diff --git a/userspace/compile-weston.sh b/userspace/compile-weston.sh new file mode 100755 index 00000000..4358e38e --- /dev/null +++ b/userspace/compile-weston.sh @@ -0,0 +1,35 @@ +#!/bin/bash -e + +sudo apt update +sudo apt install -y xz-utils meson ninja-build pkg-config cmake libxkbcommon-dev libwayland-dev libpixman-1-dev libinput-dev libdrm-dev wayland-protocols libcairo2-dev libjpeg-dev libwebp-dev libegl1-mesa-dev libpam0g-dev libseat-dev liblcms2-dev libgbm-dev libva-dev libpipewire-0.3-dev freerdp2-dev libneatvnc-dev libx11-xcb-dev libxcb-composite0-dev libxcursor-dev libsystemd-dev libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libpango1.0-dev libxml2-dev libxcb-dev libxcb-cursor-dev + +# Variables +WESTON_DIR="weston" +BUILD_DIR="build" +INSTALL_DIR="/usr/local" +WESTON_VERSION="13.0.3" +WESTON_TAR="weston-${WESTON_VERSION}.tar.xz" +WESTON_URL="https://gitlab.freedesktop.org/wayland/weston/-/releases/${WESTON_VERSION}/downloads/${WESTON_TAR}" + +# Download and extract Weston +if [ ! -d "$WESTON_DIR" ]; then + curl -L -o "$WESTON_TAR" "$WESTON_URL" + tar -xf "$WESTON_TAR" + mv "weston-${WESTON_VERSION}" "$WESTON_DIR" + rm "$WESTON_TAR" +fi + +# Build Weston +cd $WESTON_DIR + +meson setup $BUILD_DIR --prefix=$INSTALL_DIR \ + # -Ddeprecated-backend-fbdev=true \ + -Dbackend-drm=true \ + # -Dbackend-wayland=true \ + # -Dbackend-x11=true \ + # -Dbackend-headless=true \ + -Drenderer-gl=true +ninja -C $BUILD_DIR +# sudo ninja -C $BUILD_DIR install + +echo "Weston $WESTON_VERSION has been built and installed successfully." diff --git a/userspace/mesa-patches-23.3.0/25840.patch b/userspace/mesa-patches-23.3.0/25840.patch new file mode 100644 index 00000000..d1253a8c --- /dev/null +++ b/userspace/mesa-patches-23.3.0/25840.patch @@ -0,0 +1,430 @@ +From 11af8ccacff8987af8e62a3618e238f16b0fe369 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sat, 21 Oct 2023 14:36:00 +0000 +Subject: [PATCH 01/10] freedreno/a6xx: provide clear_buffer implementation + +Provide default unoptimized clear_buffer implementation, required for +rusticl. + +Signed-off-by: Dmitry Baryshkov +--- + src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc +index 65264cbdda1db..bb3e04fc3bf6c 100644 +--- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc +@@ -32,6 +32,7 @@ + #include "util/u_dump.h" + #include "util/u_log.h" + #include "util/u_surface.h" ++#include "util/u_transfer.h" + + #include "freedreno_blitter.h" + #include "freedreno_fence.h" +@@ -1404,6 +1405,7 @@ fd6_blitter_init(struct pipe_context *pctx) + return; + + pctx->clear_texture = fd6_clear_texture; ++ pctx->clear_buffer = u_default_clear_buffer; + ctx->blit = fd6_blit; + } + +-- +GitLab + + +From fd1f25c0b7c16ede11df46118c4f88e7eecb87d3 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sat, 21 Oct 2023 14:39:49 +0000 +Subject: [PATCH 02/10] freedreno/a6xx: implement get_compute_state_info + +Provide get_compute_state_info() implementation for rusticl. + +Signed-off-by: Dmitry Baryshkov +--- + .../drivers/freedreno/a6xx/fd6_compute.cc | 21 +++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc +index 5b94f4567ae71..c325891424c58 100644 +--- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc +@@ -259,6 +259,26 @@ fd6_compute_state_delete(struct pipe_context *pctx, void *_hwcso) + free(hwcso); + } + ++static void ++fd6_get_compute_state_info(struct pipe_context *pctx, void *cso, struct pipe_compute_state_object_info *info) ++{ ++ static struct ir3_shader_key key; /* static is implicitly zeroed */ ++ struct fd6_compute_state *cs = (struct fd6_compute_state *)cso; ++ struct ir3_shader_state *hwcso = (struct ir3_shader_state *)cs->hwcso; ++ struct ir3_shader_variant *v = ir3_shader_variant(ir3_get_shader(hwcso), key, false, &pctx->debug); ++ struct fd_context *ctx = fd_context(pctx); ++ ++ if (ctx->screen->info->a6xx.supports_double_threadsize) { ++ info->max_threads = 2048; ++ info->preferred_simd_size = 128; ++ } else { ++ info->max_threads = 1024; ++ info->preferred_simd_size = 64; ++ } ++ info->private_memory = v->pvtmem_size; ++ info->simd_sizes = info->preferred_simd_size; ++} ++ + template + void + fd6_compute_init(struct pipe_context *pctx) +@@ -269,6 +289,7 @@ fd6_compute_init(struct pipe_context *pctx) + ctx->launch_grid = fd6_launch_grid; + pctx->create_compute_state = fd6_compute_state_create; + pctx->delete_compute_state = fd6_compute_state_delete; ++ pctx->get_compute_state_info = fd6_get_compute_state_info; + } + + /* Teach the compiler about needed variants: */ +-- +GitLab + + +From 61fabb6030da2974f85547413f9305b09735fd75 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sat, 21 Oct 2023 14:40:57 +0000 +Subject: [PATCH 03/10] freedreno/a6xx: handle MESA_SHADER_KERNEL in + fd6_emit_shader + +The fd6_emit_shader() contains the `if (type == MESA_SHADER_COMPUTE) +type = MESA_SHADER_COMPUTE` construction, which is obviously useless. +Change that to treat MESA_SHADER_KERNEL as MESA_SHADER_COMPUTE shaders. + +Signed-off-by: Dmitry Baryshkov +--- + src/gallium/drivers/freedreno/a6xx/fd6_program.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc +index 4ee1852b75756..183bba9b1af84 100644 +--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc +@@ -119,7 +119,7 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring, + #endif + + gl_shader_stage type = so->type; +- if (type == MESA_SHADER_COMPUTE) ++ if (type == MESA_SHADER_KERNEL) + type = MESA_SHADER_COMPUTE; + + enum a6xx_threadsize thrsz = +-- +GitLab + + +From 7f406db42a0c4552d10d46f40569104a540dd4bb Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sat, 21 Oct 2023 14:44:04 +0000 +Subject: [PATCH 04/10] freedreno/ir3: treat MESA_SHADER_KERNEL in the same way + as compute + +In ir3_shader_descriptor_set() tread MESA_SHADER_KERNEL shaders in the +same way, as PIPE_SHADER_COMPUTE shaders, return 0. + +Signed-off-by: Dmitry Baryshkov +--- + src/gallium/drivers/freedreno/ir3/ir3_descriptor.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/gallium/drivers/freedreno/ir3/ir3_descriptor.h b/src/gallium/drivers/freedreno/ir3/ir3_descriptor.h +index fa14f855134ff..8f72961d8e0de 100644 +--- a/src/gallium/drivers/freedreno/ir3/ir3_descriptor.h ++++ b/src/gallium/drivers/freedreno/ir3/ir3_descriptor.h +@@ -59,6 +59,7 @@ ir3_shader_descriptor_set(enum pipe_shader_type shader) + case PIPE_SHADER_GEOMETRY: return 3; + case PIPE_SHADER_FRAGMENT: return 4; + case PIPE_SHADER_COMPUTE: return 0; ++ case MESA_SHADER_KERNEL: return 0; + default: + unreachable("bad shader stage"); + return ~0; +-- +GitLab + + +From ad264249c20ce03094f85b5698f46474767dccd1 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sat, 30 Sep 2023 10:36:19 +0000 +Subject: [PATCH 05/10] rusticl: enable freedreno + +To really use the driver, specify the environment variable: + + export RUSTICL_ENABLE=msm + +Signed-off-by: Dmitry Baryshkov +--- + src/gallium/targets/rusticl/meson.build | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/gallium/targets/rusticl/meson.build b/src/gallium/targets/rusticl/meson.build +index b2963fe6dfa76..234eba9fb2fb7 100644 +--- a/src/gallium/targets/rusticl/meson.build ++++ b/src/gallium/targets/rusticl/meson.build +@@ -50,6 +50,7 @@ librusticl = shared_library( + ], + dependencies : [ + driver_asahi, ++ driver_freedreno, + driver_iris, + driver_nouveau, + driver_r600, +-- +GitLab + + +From 1904d5d6ea9c22bc979a26d972ee8ad7315ed4ee Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sun, 22 Oct 2023 01:59:12 +0000 +Subject: [PATCH 06/10] ir3: lower the hadd operations + +There do not seem to be instructions for the ihadd/uhadd NIR operations. +Lower them to simpler ops. + +Signed-off-by: Dmitry Baryshkov +--- + src/freedreno/ir3/ir3_compiler.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c +index 34b08b2fe0bbc..03973c691c761 100644 +--- a/src/freedreno/ir3/ir3_compiler.c ++++ b/src/freedreno/ir3/ir3_compiler.c +@@ -121,6 +121,9 @@ static const nir_shader_compiler_options ir3_base_options = { + .lower_cs_local_index_to_id = true, + .lower_wpos_pntc = true, + ++ .lower_hadd = true, ++ .lower_hadd64 = true, ++ + .lower_int64_options = (nir_lower_int64_options)~0, + .lower_doubles_options = (nir_lower_doubles_options)~0, + }; +-- +GitLab + + +From c4c2b81d17de57a7e77d5e2cce8b3266406e71ff Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sun, 22 Oct 2023 04:01:23 +0000 +Subject: [PATCH 07/10] ir3: handle nir_intrinsic_load_ubo in + lower_wide_load_store() + +Unlike other load intrinsics, which use simple address as the only source, the +load_ubo intrinsic uses index and offset sources. Modify lower_wide_load_store +accordingly, to handle the load_ubo intrinsic. + +Signed-off-by: Dmitry Baryshkov +--- + .../ir3/ir3_nir_lower_wide_load_store.c | 34 +++++++++++++++++++ + 1 file changed, 34 insertions(+) + +diff --git a/src/freedreno/ir3/ir3_nir_lower_wide_load_store.c b/src/freedreno/ir3/ir3_nir_lower_wide_load_store.c +index a3fb2eec7d97b..8b36ef5e9e2f7 100644 +--- a/src/freedreno/ir3/ir3_nir_lower_wide_load_store.c ++++ b/src/freedreno/ir3/ir3_nir_lower_wide_load_store.c +@@ -79,6 +79,40 @@ lower_wide_load_store(nir_builder *b, nir_instr *instr, void *unused) + } + + return NIR_LOWER_INSTR_PROGRESS_REPLACE; ++ } else if (intr->intrinsic == nir_intrinsic_load_ubo) { ++ unsigned num_comp = nir_intrinsic_dest_components(intr); ++ unsigned bit_size = intr->def.bit_size; ++ nir_def *offset = intr->src[1].ssa; ++ nir_def *components[num_comp]; ++ ++ for (unsigned off = 0; off < num_comp;) { ++ unsigned c = MIN2(num_comp - off, 4); ++ ++ nir_intrinsic_instr *load = ++ nir_intrinsic_instr_create(b->shader, intr->intrinsic); ++ load->num_components = c; ++ load->src[0] = intr->src[0]; ++ load->src[1] = nir_src_for_ssa(offset); ++ nir_intrinsic_set_align(load, nir_intrinsic_align(intr), 0); ++ nir_def_init(&load->instr, &load->def, c, bit_size); ++ if (nir_intrinsic_has_range(intr)) ++ nir_intrinsic_set_range(load, nir_intrinsic_range(intr)); ++ if (nir_intrinsic_has_range_base(intr)) ++ nir_intrinsic_set_range_base(load, nir_intrinsic_range_base(intr)); ++ if (nir_intrinsic_has_base(intr)) ++ nir_intrinsic_set_base(load, nir_intrinsic_base(intr)); ++ nir_builder_instr_insert(b, &load->instr); ++ ++ offset = nir_iadd(b, ++ nir_imm_intN_t(b, (c * bit_size) / 8, offset->bit_size), ++ offset); ++ ++ for (unsigned i = 0; i < c; i++) { ++ components[off++] = nir_channel(b, &load->def, i); ++ } ++ } ++ ++ return nir_build_alu_src_arr(b, nir_op_vec(num_comp), components); + } else { + unsigned num_comp = nir_intrinsic_dest_components(intr); + unsigned bit_size = intr->def.bit_size; +-- +GitLab + + +From af0ae59947b35710974eaa336661d167738abd8e Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sun, 22 Oct 2023 09:47:41 +0000 +Subject: [PATCH 08/10] ir3: fix shift amount for 8-bit shifts + +Follow the 16-bit approach and convert shift amount to 8b for 8b shift +instructions. + +Signed-off-by: Dmitry Baryshkov +--- + src/freedreno/ir3/ir3_compiler_nir.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c +index d1f36c7f493ef..0da15340814b1 100644 +--- a/src/freedreno/ir3/ir3_compiler_nir.c ++++ b/src/freedreno/ir3/ir3_compiler_nir.c +@@ -294,10 +294,12 @@ static struct ir3_instruction * + resize_shift_amount(struct ir3_context *ctx, struct ir3_instruction *src, + unsigned bs) + { +- if (bs != 16) ++ if (bs == 16) ++ return ir3_COV(ctx->block, src, TYPE_U32, TYPE_U16); ++ else if (bs == 8) ++ return ir3_COV(ctx->block, src, TYPE_U32, TYPE_U8); ++ else + return src; +- +- return ir3_COV(ctx->block, src, TYPE_U32, TYPE_U16); + } + + static void +-- +GitLab + + +From 67a5854ff5110e86d7c3a2c498c3eef71a58ee77 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sun, 22 Oct 2023 18:05:23 +0000 +Subject: [PATCH 09/10] ir3/a6xx: fix ldg/stg of ulong2 and ulong4 data + +Partially revert the commit f4c9e9329cf ("ir3/a6xx: Fix immediate +offset stg/ldg path"). + +There is no need to multiply the immediate offsets by 4. Doing so +results in loading and/or storing the data at wrong locations. + +Signed-off-by: Dmitry Baryshkov +--- + src/freedreno/ir3/ir3_a6xx.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c +index 1592a56a397c0..33b70323678c6 100644 +--- a/src/freedreno/ir3/ir3_a6xx.c ++++ b/src/freedreno/ir3/ir3_a6xx.c +@@ -343,7 +343,7 @@ emit_intrinsic_load_global_ir3(struct ir3_context *ctx, + nir_src_as_int(intr->src[1]) > -(1 << 10); + + if (const_offset_in_bounds) { +- load = ir3_LDG(b, addr, 0, create_immed(b, nir_src_as_int(intr->src[1]) * 4), ++ load = ir3_LDG(b, addr, 0, create_immed(b, nir_src_as_int(intr->src[1])), + 0, create_immed(b, dest_components), 0); + } else { + offset = ir3_get_src(ctx, &intr->src[1])[0]; +@@ -386,7 +386,7 @@ emit_intrinsic_store_global_ir3(struct ir3_context *ctx, + + if (const_offset_in_bounds) { + stg = ir3_STG(b, addr, 0, +- create_immed(b, nir_src_as_int(intr->src[2]) * 4), 0, ++ create_immed(b, nir_src_as_int(intr->src[2])), 0, + value, 0, + create_immed(b, ncomp), 0); + } else { +-- +GitLab + + +From 4fc3f74ef614cdd465b183db077f647e5c9e7aa9 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Mon, 13 Nov 2023 21:24:16 +0200 +Subject: [PATCH 10/10] freedreno/drm: fallback to default BO allocation if + heap alloc fails + +Allow fd_bo_heap_alloc() to return NULL if the heap is exausted (or +fragmented) instead of segfaulting. Then handle the error properly in +bo_new(). + +Signed-off-by: Dmitry Baryshkov +--- + src/freedreno/drm/freedreno_bo.c | 8 +++++--- + src/freedreno/drm/freedreno_bo_heap.c | 16 +++++++++++----- + 2 files changed, 16 insertions(+), 8 deletions(-) + +diff --git a/src/freedreno/drm/freedreno_bo.c b/src/freedreno/drm/freedreno_bo.c +index efde63ec73fca..1415cb66ed9a5 100644 +--- a/src/freedreno/drm/freedreno_bo.c ++++ b/src/freedreno/drm/freedreno_bo.c +@@ -132,9 +132,11 @@ bo_new(struct fd_device *dev, uint32_t size, uint32_t flags, + + if (size < FD_BO_HEAP_BLOCK_SIZE) { + if ((flags == 0) && dev->default_heap) +- return fd_bo_heap_alloc(dev->default_heap, size); +- if ((flags == RING_FLAGS) && dev->ring_heap) +- return fd_bo_heap_alloc(dev->ring_heap, size); ++ bo = fd_bo_heap_alloc(dev->default_heap, size); ++ else if ((flags == RING_FLAGS) && dev->ring_heap) ++ bo = fd_bo_heap_alloc(dev->ring_heap, size); ++ if (bo) ++ return bo; + } + + /* demote cached-coherent to WC if not supported: */ +diff --git a/src/freedreno/drm/freedreno_bo_heap.c b/src/freedreno/drm/freedreno_bo_heap.c +index dc1af739d23d4..64c498255e669 100644 +--- a/src/freedreno/drm/freedreno_bo_heap.c ++++ b/src/freedreno/drm/freedreno_bo_heap.c +@@ -211,10 +211,6 @@ fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size) + { + heap_clean(heap, true); + +- struct sa_bo *s = calloc(1, sizeof(*s)); +- +- s->heap = heap; +- + /* util_vma does not like zero byte allocations, which we get, for + * ex, with the initial query buffer allocation on pre-a5xx: + */ +@@ -229,7 +225,17 @@ fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size) + * (The 8k threshold is just a random guess, but seems to work ok) + */ + heap->heap.alloc_high = (size <= 8 * 1024); +- s->offset = util_vma_heap_alloc(&heap->heap, size, SUBALLOC_ALIGNMENT); ++ uint64_t offset = util_vma_heap_alloc(&heap->heap, size, SUBALLOC_ALIGNMENT); ++ if (!offset) { ++ simple_mtx_unlock(&heap->lock); ++ return NULL; ++ } ++ ++ struct sa_bo *s = calloc(1, sizeof(*s)); ++ ++ s->heap = heap; ++ s->offset = offset; ++ + assert((s->offset / FD_BO_HEAP_BLOCK_SIZE) == (s->offset + size - 1) / FD_BO_HEAP_BLOCK_SIZE); + unsigned idx = block_idx(s); + if (HEAP_DEBUG) +-- +GitLab + diff --git a/userspace/mesa-patches-23.3.0/26554.patch b/userspace/mesa-patches-23.3.0/26554.patch new file mode 100644 index 00000000..d074082d --- /dev/null +++ b/userspace/mesa-patches-23.3.0/26554.patch @@ -0,0 +1,30 @@ +From 8cb7b0ed9074493faca6c1b57b95ec1bf5e12bd3 Mon Sep 17 00:00:00 2001 +From: David Heidelberg +Date: Wed, 6 Dec 2023 21:49:06 +0100 +Subject: [PATCH] freedreno: implement PIPE_CAP_TIMER_RESOLUTION + +Allows rusticl to create queues with profiling enabled. + +Fixes: 660f2eabe11 ("gallium: add PIPE_CAP_TIMER_RESOLUTION") + +Signed-off-by: David Heidelberg +--- + src/gallium/drivers/freedreno/freedreno_screen.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c +index 6cadea8c2eb61..e75d21eadee38 100644 +--- a/src/gallium/drivers/freedreno/freedreno_screen.c ++++ b/src/gallium/drivers/freedreno/freedreno_screen.c +@@ -590,6 +590,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) + /* only a4xx, requires new enough kernel so we know max_freq: */ + return (screen->max_freq > 0) && + (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)); ++ case PIPE_CAP_TIMER_RESOLUTION: ++ return DIV_ROUND_UP(1000000000ull, 19200000); /* RBBM timer */ + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_SO_OVERFLOW: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE: +-- +GitLab + diff --git a/userspace/mesa-patches-24.0.5/rusticl-msm.patch b/userspace/mesa-patches-24.0.5/rusticl-msm.patch new file mode 100644 index 00000000..722617b7 --- /dev/null +++ b/userspace/mesa-patches-24.0.5/rusticl-msm.patch @@ -0,0 +1,246 @@ +From 5b90fd8e295383448f005b2de3bb473658cb4bd7 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sat, 21 Oct 2023 14:36:00 +0000 +Subject: [PATCH 1/6] freedreno/a6xx: provide clear_buffer implementation + +Provide default unoptimized clear_buffer implementation, required for +rusticl. + +Signed-off-by: Dmitry Baryshkov +--- + src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc +index 65264cb..bb3e04f 100644 +--- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc +@@ -32,6 +32,7 @@ + #include "util/u_dump.h" + #include "util/u_log.h" + #include "util/u_surface.h" ++#include "util/u_transfer.h" + + #include "freedreno_blitter.h" + #include "freedreno_fence.h" +@@ -1404,6 +1405,7 @@ fd6_blitter_init(struct pipe_context *pctx) + return; + + pctx->clear_texture = fd6_clear_texture; ++ pctx->clear_buffer = u_default_clear_buffer; + ctx->blit = fd6_blit; + } + +-- +2.43.0 + + +From a2bede2791f887e2679bd1db7f9cc0c261e24409 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sat, 21 Oct 2023 14:39:49 +0000 +Subject: [PATCH 2/6] freedreno/a6xx: implement get_compute_state_info + +Provide get_compute_state_info() implementation for rusticl. + +Signed-off-by: Dmitry Baryshkov +--- + .../drivers/freedreno/a6xx/fd6_compute.cc | 21 +++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc +index 5b94f45..c325891 100644 +--- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc ++++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc +@@ -259,6 +259,26 @@ fd6_compute_state_delete(struct pipe_context *pctx, void *_hwcso) + free(hwcso); + } + ++static void ++fd6_get_compute_state_info(struct pipe_context *pctx, void *cso, struct pipe_compute_state_object_info *info) ++{ ++ static struct ir3_shader_key key; /* static is implicitly zeroed */ ++ struct fd6_compute_state *cs = (struct fd6_compute_state *)cso; ++ struct ir3_shader_state *hwcso = (struct ir3_shader_state *)cs->hwcso; ++ struct ir3_shader_variant *v = ir3_shader_variant(ir3_get_shader(hwcso), key, false, &pctx->debug); ++ struct fd_context *ctx = fd_context(pctx); ++ ++ if (ctx->screen->info->a6xx.supports_double_threadsize) { ++ info->max_threads = 2048; ++ info->preferred_simd_size = 128; ++ } else { ++ info->max_threads = 1024; ++ info->preferred_simd_size = 64; ++ } ++ info->private_memory = v->pvtmem_size; ++ info->simd_sizes = info->preferred_simd_size; ++} ++ + template + void + fd6_compute_init(struct pipe_context *pctx) +@@ -269,6 +289,7 @@ fd6_compute_init(struct pipe_context *pctx) + ctx->launch_grid = fd6_launch_grid; + pctx->create_compute_state = fd6_compute_state_create; + pctx->delete_compute_state = fd6_compute_state_delete; ++ pctx->get_compute_state_info = fd6_get_compute_state_info; + } + + /* Teach the compiler about needed variants: */ +-- +2.43.0 + + +From c493536a694e0c61ea6952e2fd3787b3f8224b48 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sat, 21 Oct 2023 14:44:04 +0000 +Subject: [PATCH 3/6] freedreno/ir3: treat MESA_SHADER_KERNEL in the same way + as compute + +In ir3_shader_descriptor_set() tread MESA_SHADER_KERNEL shaders in the +same way, as PIPE_SHADER_COMPUTE shaders, return 0. + +Signed-off-by: Dmitry Baryshkov +--- + src/gallium/drivers/freedreno/ir3/ir3_descriptor.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/gallium/drivers/freedreno/ir3/ir3_descriptor.h b/src/gallium/drivers/freedreno/ir3/ir3_descriptor.h +index fa14f85..8f72961 100644 +--- a/src/gallium/drivers/freedreno/ir3/ir3_descriptor.h ++++ b/src/gallium/drivers/freedreno/ir3/ir3_descriptor.h +@@ -59,6 +59,7 @@ ir3_shader_descriptor_set(enum pipe_shader_type shader) + case PIPE_SHADER_GEOMETRY: return 3; + case PIPE_SHADER_FRAGMENT: return 4; + case PIPE_SHADER_COMPUTE: return 0; ++ case MESA_SHADER_KERNEL: return 0; + default: + unreachable("bad shader stage"); + return ~0; +-- +2.43.0 + + +From 874478a15f7aac8d9a319abf2bb3993ee4cd0baf Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sat, 30 Sep 2023 10:36:19 +0000 +Subject: [PATCH 4/6] rusticl: enable freedreno + +To really use the driver, specify the environment variable: + + export RUSTICL_ENABLE=msm + +Signed-off-by: Dmitry Baryshkov +--- + src/gallium/targets/rusticl/meson.build | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/gallium/targets/rusticl/meson.build b/src/gallium/targets/rusticl/meson.build +index 8853625..d7bc2e5 100644 +--- a/src/gallium/targets/rusticl/meson.build ++++ b/src/gallium/targets/rusticl/meson.build +@@ -34,6 +34,7 @@ librusticl = shared_library( + ], + dependencies : [ + driver_asahi, ++ driver_freedreno, + driver_iris, + driver_nouveau, + driver_r600, +-- +2.43.0 + + +From 4d0c32c3dfbf52988314989b771593895be72b95 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sun, 22 Oct 2023 01:59:12 +0000 +Subject: [PATCH 5/6] ir3: lower the hadd operations + +There do not seem to be instructions for the ihadd/uhadd NIR operations. +Lower them to simpler ops. + +Signed-off-by: Dmitry Baryshkov +--- + src/freedreno/ir3/ir3_compiler.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c +index 6a2a2eb..71ceab6 100644 +--- a/src/freedreno/ir3/ir3_compiler.c ++++ b/src/freedreno/ir3/ir3_compiler.c +@@ -124,6 +124,9 @@ static const nir_shader_compiler_options ir3_base_options = { + .lower_cs_local_index_to_id = true, + .lower_wpos_pntc = true, + ++ .lower_hadd = true, ++ .lower_hadd64 = true, ++ + .lower_int64_options = (nir_lower_int64_options)~0, + .lower_doubles_options = (nir_lower_doubles_options)~0, + +-- +2.43.0 + + +From b97baaacb0ee44c478af5f0d0d8b29b424878492 Mon Sep 17 00:00:00 2001 +From: Dmitry Baryshkov +Date: Sun, 22 Oct 2023 04:01:23 +0000 +Subject: [PATCH 6/6] ir3: handle nir_intrinsic_load_ubo in + lower_wide_load_store() + +Unlike other load intrinsics, which use simple address as the only source, the +load_ubo intrinsic uses index and offset sources. Modify lower_wide_load_store +accordingly, to handle the load_ubo intrinsic. + +Signed-off-by: Dmitry Baryshkov +--- + .../ir3/ir3_nir_lower_wide_load_store.c | 34 +++++++++++++++++++ + 1 file changed, 34 insertions(+) + +diff --git a/src/freedreno/ir3/ir3_nir_lower_wide_load_store.c b/src/freedreno/ir3/ir3_nir_lower_wide_load_store.c +index a3fb2ee..8b36ef5 100644 +--- a/src/freedreno/ir3/ir3_nir_lower_wide_load_store.c ++++ b/src/freedreno/ir3/ir3_nir_lower_wide_load_store.c +@@ -79,6 +79,40 @@ lower_wide_load_store(nir_builder *b, nir_instr *instr, void *unused) + } + + return NIR_LOWER_INSTR_PROGRESS_REPLACE; ++ } else if (intr->intrinsic == nir_intrinsic_load_ubo) { ++ unsigned num_comp = nir_intrinsic_dest_components(intr); ++ unsigned bit_size = intr->def.bit_size; ++ nir_def *offset = intr->src[1].ssa; ++ nir_def *components[num_comp]; ++ ++ for (unsigned off = 0; off < num_comp;) { ++ unsigned c = MIN2(num_comp - off, 4); ++ ++ nir_intrinsic_instr *load = ++ nir_intrinsic_instr_create(b->shader, intr->intrinsic); ++ load->num_components = c; ++ load->src[0] = intr->src[0]; ++ load->src[1] = nir_src_for_ssa(offset); ++ nir_intrinsic_set_align(load, nir_intrinsic_align(intr), 0); ++ nir_def_init(&load->instr, &load->def, c, bit_size); ++ if (nir_intrinsic_has_range(intr)) ++ nir_intrinsic_set_range(load, nir_intrinsic_range(intr)); ++ if (nir_intrinsic_has_range_base(intr)) ++ nir_intrinsic_set_range_base(load, nir_intrinsic_range_base(intr)); ++ if (nir_intrinsic_has_base(intr)) ++ nir_intrinsic_set_base(load, nir_intrinsic_base(intr)); ++ nir_builder_instr_insert(b, &load->instr); ++ ++ offset = nir_iadd(b, ++ nir_imm_intN_t(b, (c * bit_size) / 8, offset->bit_size), ++ offset); ++ ++ for (unsigned i = 0; i < c; i++) { ++ components[off++] = nir_channel(b, &load->def, i); ++ } ++ } ++ ++ return nir_build_alu_src_arr(b, nir_op_vec(num_comp), components); + } else { + unsigned num_comp = nir_intrinsic_dest_components(intr); + unsigned bit_size = intr->def.bit_size; +-- +2.43.0 +