From 24e67286663d3eac9085befaac6a55e46ffa945a Mon Sep 17 00:00:00 2001
From: Marcin Kurczewski <dash@wind.garden>
Date: Tue, 30 Apr 2024 19:07:22 +0200
Subject: [PATCH] audio: add from TR1X

---
 include/libtrx/engine/audio.h |  47 +++
 meson.build                   |  21 ++
 src/engine/audio.c            | 118 ++++++
 src/engine/audio.h            |  24 ++
 src/engine/audio_sample.c     | 682 ++++++++++++++++++++++++++++++++++
 src/engine/audio_stream.c     | 602 ++++++++++++++++++++++++++++++
 tools/ffmpeg_flags.txt        |  27 ++
 7 files changed, 1521 insertions(+)
 create mode 100644 include/libtrx/engine/audio.h
 create mode 100644 src/engine/audio.c
 create mode 100644 src/engine/audio.h
 create mode 100644 src/engine/audio_sample.c
 create mode 100644 src/engine/audio_stream.c
 create mode 100644 tools/ffmpeg_flags.txt

diff --git a/include/libtrx/engine/audio.h b/include/libtrx/engine/audio.h
new file mode 100644
index 0000000..8a463f6
--- /dev/null
+++ b/include/libtrx/engine/audio.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <SDL2/SDL_audio.h>
+#include <libavutil/samplefmt.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#define AUDIO_MAX_SAMPLES 1000
+#define AUDIO_MAX_ACTIVE_SAMPLES 50
+#define AUDIO_MAX_ACTIVE_STREAMS 10
+#define AUDIO_NO_SOUND (-1)
+
+bool Audio_Init(void);
+bool Audio_Shutdown(void);
+
+bool Audio_Stream_Pause(int32_t sound_id);
+bool Audio_Stream_Unpause(int32_t sound_id);
+int32_t Audio_Stream_CreateFromFile(const char *path);
+bool Audio_Stream_Close(int32_t sound_id);
+bool Audio_Stream_IsLooped(int32_t sound_id);
+bool Audio_Stream_SetVolume(int32_t sound_id, float volume);
+bool Audio_Stream_SetIsLooped(int32_t sound_id, bool is_looped);
+bool Audio_Stream_SetFinishCallback(
+    int32_t sound_id, void (*callback)(int32_t sound_id, void *user_data),
+    void *user_data);
+double Audio_Stream_GetTimestamp(int32_t sound_id);
+double Audio_Stream_GetDuration(int32_t sound_id);
+bool Audio_Stream_SeekTimestamp(int32_t sound_id, double timestamp);
+bool Audio_Stream_SetStartTimestamp(int32_t sound_id, double timestamp);
+bool Audio_Stream_SetStopTimestamp(int32_t sound_id, double timestamp);
+
+bool Audio_Sample_ClearAll(void);
+bool Audio_Sample_Load(size_t count, const char **contents, size_t *sizes);
+
+int32_t Audio_Sample_Play(
+    int32_t sample_id, int32_t volume, float pitch, int32_t pan,
+    bool is_looped);
+bool Audio_Sample_IsPlaying(int32_t sound_id);
+bool Audio_Sample_Pause(int32_t sound_id);
+bool Audio_Sample_PauseAll(void);
+bool Audio_Sample_Unpause(int32_t sound_id);
+bool Audio_Sample_UnpauseAll(void);
+bool Audio_Sample_Close(int32_t sound_id);
+bool Audio_Sample_CloseAll(void);
+bool Audio_Sample_SetPan(int32_t sound_id, int32_t pan);
+bool Audio_Sample_SetVolume(int32_t sound_id, int32_t volume);
diff --git a/meson.build b/meson.build
index c449d08..893b05c 100644
--- a/meson.build
+++ b/meson.build
@@ -23,12 +23,27 @@ if host_machine.system() == 'darwin'
   staticdeps = false
 endif
 
+null_dep = dependency('', required: false)
+dep_avcodec = dependency('libavcodec', static: staticdeps)
+dep_avformat = dependency('libavformat', static: staticdeps)
+dep_avutil = dependency('libavutil', static: staticdeps)
 dep_sdl2 = dependency('SDL2', static: staticdeps)
 dep_pcre2 = dependency('libpcre2-8', static: staticdeps)
 dep_backtrace = c_compiler.find_library('backtrace', static: true, required: false)
+dep_swscale = dependency('libswscale', static: staticdeps)
+dep_swresample = dependency('libswresample', static: staticdeps)
+
+dep_zlib = null_dep
+
+if not staticdeps
+  dep_zlib = dependency('zlib', static: staticdeps)
+endif
 
 sources = [
   'src/filesystem.c',
+  'src/engine/audio.c',
+  'src/engine/audio_sample.c',
+  'src/engine/audio_stream.c',
   'src/json/bson_parse.c',
   'src/json/bson_write.c',
   'src/json/json_base.c',
@@ -40,9 +55,15 @@ sources = [
 ]
 
 dependencies = [
+  dep_avcodec,
+  dep_avformat,
+  dep_avutil,
   dep_sdl2,
   dep_pcre2,
   dep_backtrace,
+  dep_swresample,
+  dep_swscale,
+  dep_zlib,
 ]
 
 if dep_backtrace.found() and host_machine.system() == 'linux'
diff --git a/src/engine/audio.c b/src/engine/audio.c
new file mode 100644
index 0000000..21d7c7d
--- /dev/null
+++ b/src/engine/audio.c
@@ -0,0 +1,118 @@
+#include "audio.h"
+
+#include "log.h"
+#include "memory.h"
+
+#include <SDL2/SDL.h>
+#include <SDL2/SDL_error.h>
+#include <SDL2/SDL_stdinc.h>
+#include <stdint.h>
+#include <string.h>
+
+SDL_AudioDeviceID g_AudioDeviceID = 0;
+static int32_t m_RefCount = 0;
+static size_t m_WorkingBufferSize = 0;
+static float *m_WorkingBuffer = NULL;
+static Uint8 m_WorkingSilence = 0;
+
+static void Audio_MixerCallback(
+    void *userdata, Uint8 *stream_data, int32_t len);
+
+static void Audio_MixerCallback(void *userdata, Uint8 *stream_data, int32_t len)
+{
+    memset(m_WorkingBuffer, m_WorkingSilence, len);
+    Audio_Stream_Mix(m_WorkingBuffer, len);
+    Audio_Sample_Mix(m_WorkingBuffer, len);
+    memcpy(stream_data, m_WorkingBuffer, len);
+}
+
+bool Audio_Init(void)
+{
+    m_RefCount++;
+    if (g_AudioDeviceID) {
+        // already initialized
+        return true;
+    }
+
+    int32_t result = SDL_Init(SDL_INIT_AUDIO);
+    if (result < 0) {
+        LOG_ERROR("Error while calling SDL_Init: 0x%lx", result);
+        return false;
+    }
+
+    Audio_Sample_Init();
+    Audio_Stream_Init();
+
+    SDL_AudioSpec desired;
+    SDL_memset(&desired, 0, sizeof(desired));
+    desired.freq = AUDIO_WORKING_RATE;
+    desired.format = AUDIO_WORKING_FORMAT;
+    desired.channels = AUDIO_WORKING_CHANNELS;
+    desired.samples = AUDIO_SAMPLES;
+    desired.callback = Audio_MixerCallback;
+    desired.userdata = NULL;
+
+    SDL_AudioSpec delivered;
+    g_AudioDeviceID = SDL_OpenAudioDevice(NULL, 0, &desired, &delivered, 0);
+
+    if (!g_AudioDeviceID) {
+        LOG_ERROR("Failed to open audio device: %s", SDL_GetError());
+        return false;
+    }
+
+    m_WorkingSilence = desired.silence;
+    m_WorkingBufferSize = desired.samples * desired.channels
+        * SDL_AUDIO_BITSIZE(desired.format) / 8;
+
+    m_WorkingBuffer = Memory_Alloc(m_WorkingBufferSize);
+
+    SDL_PauseAudioDevice(g_AudioDeviceID, 0);
+
+    return true;
+}
+
+bool Audio_Shutdown(void)
+{
+    m_RefCount--;
+    if (m_RefCount > 0) {
+        return false;
+    }
+
+    Audio_Sample_Shutdown();
+    Audio_Stream_Shutdown();
+
+    if (g_AudioDeviceID) {
+        SDL_PauseAudioDevice(g_AudioDeviceID, 1);
+        SDL_CloseAudioDevice(g_AudioDeviceID);
+        g_AudioDeviceID = 0;
+    }
+
+    Memory_FreePointer(&m_WorkingBuffer);
+    return true;
+}
+
+int32_t Audio_GetAVAudioFormat(const int32_t sample_fmt)
+{
+    // clang-format off
+    switch (sample_fmt) {
+        case AUDIO_U8: return AV_SAMPLE_FMT_U8;
+        case AUDIO_S16: return AV_SAMPLE_FMT_S16;
+        case AUDIO_S32: return AV_SAMPLE_FMT_S32;
+        case AUDIO_F32: return AV_SAMPLE_FMT_FLT;
+        default: return -1;
+    }
+    // clang-format on
+}
+
+int32_t Audio_GetSDLAudioFormat(const enum AVSampleFormat sample_fmt)
+{
+    // clang-format off
+    switch (sample_fmt) {
+        case AV_SAMPLE_FMT_U8: return AUDIO_U8;
+        case AV_SAMPLE_FMT_S16: return AUDIO_S16;
+        case AV_SAMPLE_FMT_S32: return AUDIO_S32;
+        case AV_SAMPLE_FMT_FLT: return AUDIO_F32;
+        default: return -1;
+    }
+    // clang-format on
+}
diff --git a/src/engine/audio.h b/src/engine/audio.h
new file mode 100644
index 0000000..a68ea2a
--- /dev/null
+++ b/src/engine/audio.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "engine/audio.h"
+
+#include <libavformat/avformat.h>
+#include <SDL2/SDL.h>
+
+#define AUDIO_WORKING_RATE 44100
+#define AUDIO_WORKING_FORMAT AUDIO_F32
+#define AUDIO_SAMPLES 500
+#define AUDIO_WORKING_CHANNELS 2
+
+extern SDL_AudioDeviceID g_AudioDeviceID;
+
+int32_t Audio_GetAVAudioFormat(const int32_t sample_fmt);
+int32_t Audio_GetSDLAudioFormat(const enum AVSampleFormat sample_fmt);
+
+void Audio_Sample_Init(void);
+void Audio_Sample_Shutdown(void);
+void Audio_Sample_Mix(float *dst_buffer, size_t len);
+
+void Audio_Stream_Init(void);
+void Audio_Stream_Shutdown(void);
+void Audio_Stream_Mix(float *dst_buffer, size_t len);
diff --git a/src/engine/audio_sample.c b/src/engine/audio_sample.c
new file mode 100644
index 0000000..b34dfce
--- /dev/null
+++ b/src/engine/audio_sample.c
@@ -0,0 +1,682 @@
+#include "audio.h"
+
+#include "log.h"
+#include "memory.h"
+
+#include <SDL2/SDL_audio.h>
+#include <assert.h>
+#include <errno.h>
+#include <libavcodec/avcodec.h>
+#include <libavcodec/codec.h>
+#include <libavcodec/packet.h>
+#include <libavformat/avformat.h>
+#include <libavformat/avio.h>
+#include <libavutil/avutil.h>
+#include <libavutil/error.h>
+#include <libavutil/frame.h>
+#include <libavutil/mem.h>
+#include <libavutil/samplefmt.h>
+#include <libswresample/swresample.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef struct AUDIO_SAMPLE {
+    float *sample_data;
+    int32_t channels;
+    int32_t num_samples;
+} AUDIO_SAMPLE;
+
+typedef struct AUDIO_SAMPLE_SOUND {
+    bool is_used;
+    bool is_looped;
+    bool is_playing;
+    float volume_l; // sample gain multiplier
+    float volume_r; // sample gain multiplier
+
+    float pitch;
+    int32_t volume; // volume specified in hundredths of decibel
+    int32_t pan; // pan specified in hundredths of decibel
+
+    // pitch shift means the same samples can be reused twice, hence float
+    float current_sample;
+
+    AUDIO_SAMPLE *sample;
+} AUDIO_SAMPLE_SOUND;
+
+typedef struct AUDIO_AV_BUFFER {
+    const char *data;
+    const char *ptr;
+    int32_t size;
+    int32_t remaining;
+} AUDIO_AV_BUFFER;
+
+static int32_t m_LoadedSamplesCount = 0;
+static AUDIO_SAMPLE m_LoadedSamples[AUDIO_MAX_SAMPLES] = { 0 };
+static AUDIO_SAMPLE_SOUND m_Sample_s[AUDIO_MAX_ACTIVE_SAMPLES] = { 0 };
+
+static double Audio_DecibelToMultiplier(double db_gain)
+{
+    return pow(2.0, db_gain / 600.0);
+}
+
+static bool Audio_SampleRecalculateChannelVolumes(int32_t sound_id)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_SAMPLES) {
+        return false;
+    }
+
+    AUDIO_SAMPLE_SOUND *sound = &m_Sample_s[sound_id];
+    sound->volume_l = Audio_DecibelToMultiplier(
+        sound->volume - (sound->pan > 0 ? sound->pan : 0));
+    sound->volume_r = Audio_DecibelToMultiplier(
+        sound->volume + (sound->pan < 0 ? sound->pan : 0));
+
+    return true;
+}
+
+static int32_t Audio_ReadAVBuffer(void *opaque, uint8_t *dst, int32_t dst_size)
+{
+    assert(opaque != NULL);
+    assert(dst != NULL);
+    AUDIO_AV_BUFFER *src = opaque;
+    int32_t read = dst_size >= src->remaining ? src->remaining : dst_size;
+    if (!read) {
+        return AVERROR_EOF;
+    }
+    memcpy(dst, src->ptr, read);
+    src->ptr += read;
+    src->remaining -= read;
+    return read;
+}
+
+static int64_t Audio_SeekAVBuffer(void *opaque, int64_t offset, int32_t whence)
+{
+    assert(opaque != NULL);
+    AUDIO_AV_BUFFER *src = opaque;
+    if (whence & AVSEEK_SIZE) {
+        return src->size;
+    }
+    switch (whence) {
+    case SEEK_SET:
+        if (src->size - offset < 0) {
+            return AVERROR_EOF;
+        }
+        src->ptr = src->data + offset;
+        src->remaining = src->size - offset;
+        break;
+    case SEEK_CUR:
+        if (src->remaining - offset < 0) {
+            return AVERROR_EOF;
+        }
+        src->ptr += offset;
+        src->remaining -= offset;
+        break;
+    case SEEK_END:
+        if (src->size + offset < 0) {
+            return AVERROR_EOF;
+        }
+        src->ptr = src->data - offset;
+        src->remaining = src->size + offset;
+        break;
+    }
+    return src->ptr - src->data;
+}
+
+static bool Audio_SampleLoad(
+    int32_t sample_id, const char *content, size_t size)
+{
+    assert(content != NULL);
+
+    if (!g_AudioDeviceID || sample_id < 0 || sample_id >= AUDIO_MAX_SAMPLES) {
+        return false;
+    }
+
+    bool ret = false;
+    AUDIO_SAMPLE *sample = &m_LoadedSamples[sample_id];
+
+    size_t working_buffer_size = 0;
+    float *working_buffer = NULL;
+
+    struct {
+        size_t read_buffer_size;
+        AVIOContext *avio_context;
+        AVStream *stream;
+        AVFormatContext *format_ctx;
+        const AVCodec *codec;
+        AVCodecContext *codec_ctx;
+        AVPacket *packet;
+        AVFrame *frame;
+    } av = {
+        .read_buffer_size = 8192,
+        .avio_context = NULL,
+        .stream = NULL,
+        .format_ctx = NULL,
+        .codec = NULL,
+        .codec_ctx = NULL,
+        .packet = NULL,
+        .frame = NULL,
+    };
+
+    struct {
+        int32_t src_format;
+        int32_t src_channels;
+        int32_t src_sample_rate;
+        int32_t dst_format;
+        int32_t dst_channels;
+        int32_t dst_sample_rate;
+        SwrContext *ctx;
+    } swr = { 0 };
+
+    int32_t error_code;
+
+    unsigned char *read_buffer = av_malloc(av.read_buffer_size);
+    if (!read_buffer) {
+        error_code = AVERROR(ENOMEM);
+        goto cleanup;
+    }
+
+    AUDIO_AV_BUFFER av_buf = {
+        .data = content,
+        .ptr = content,
+        .size = size,
+        .remaining = size,
+    };
+
+    av.avio_context = avio_alloc_context(
+        read_buffer, av.read_buffer_size, 0, &av_buf, Audio_ReadAVBuffer, NULL,
+        Audio_SeekAVBuffer);
+
+    av.format_ctx = avformat_alloc_context();
+    av.format_ctx->pb = av.avio_context;
+    error_code =
+        avformat_open_input(&av.format_ctx, "dummy_filename", NULL, NULL);
+    if (error_code != 0) {
+        goto cleanup;
+    }
+
+    error_code = avformat_find_stream_info(av.format_ctx, NULL);
+    if (error_code < 0) {
+        goto cleanup;
+    }
+
+    av.stream = NULL;
+    for (uint32_t i = 0; i < av.format_ctx->nb_streams; i++) {
+        AVStream *current_stream = av.format_ctx->streams[i];
+        if (current_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+            av.stream = current_stream;
+            break;
+        }
+    }
+    if (!av.stream) {
+        error_code = AVERROR_STREAM_NOT_FOUND;
+        goto cleanup;
+    }
+
+    av.codec = avcodec_find_decoder(av.stream->codecpar->codec_id);
+    if (!av.codec) {
+        error_code = AVERROR_DEMUXER_NOT_FOUND;
+        goto cleanup;
+    }
+
+    av.codec_ctx = avcodec_alloc_context3(av.codec);
+    if (!av.codec_ctx) {
+        error_code = AVERROR(ENOMEM);
+        goto cleanup;
+    }
+
+    error_code =
+        avcodec_parameters_to_context(av.codec_ctx, av.stream->codecpar);
+    if (error_code) {
+        goto cleanup;
+    }
+
+    error_code = avcodec_open2(av.codec_ctx, av.codec, NULL);
+    if (error_code < 0) {
+        goto cleanup;
+    }
+
+    av.packet = av_packet_alloc();
+    if (!av.packet) {
+        error_code = AVERROR(ENOMEM);
+        goto cleanup;
+    }
+
+    av.frame = av_frame_alloc();
+    if (!av.frame) {
+        error_code = AVERROR(ENOMEM);
+        goto cleanup;
+    }
+
+    while (1) {
+        error_code = av_read_frame(av.format_ctx, av.packet);
+        if (error_code == AVERROR_EOF) {
+            av_packet_unref(av.packet);
+            error_code = 0;
+            break;
+        }
+
+        if (error_code < 0) {
+            av_packet_unref(av.packet);
+            goto cleanup;
+        }
+
+        error_code = avcodec_send_packet(av.codec_ctx, av.packet);
+        if (error_code < 0) {
+            av_packet_unref(av.packet);
+            goto cleanup;
+        }
+
+        if (!swr.ctx) {
+            swr.src_sample_rate = av.codec_ctx->sample_rate;
+            swr.src_channels = av.codec_ctx->channels;
+            swr.src_format = av.codec_ctx->sample_fmt;
+            swr.dst_sample_rate = AUDIO_WORKING_RATE;
+            swr.dst_channels = 1;
+            swr.dst_format = Audio_GetAVAudioFormat(AUDIO_WORKING_FORMAT);
+            swr.ctx = swr_alloc_set_opts(
+                swr.ctx, swr.dst_channels, swr.dst_format, swr.dst_sample_rate,
+                swr.src_channels, swr.src_format, swr.src_sample_rate, 0, 0);
+            if (!swr.ctx) {
+                av_packet_unref(av.packet);
+                error_code = AVERROR(ENOMEM);
+                goto cleanup;
+            }
+
+            error_code = swr_init(swr.ctx);
+            if (error_code != 0) {
+                av_packet_unref(av.packet);
+                goto cleanup;
+            }
+        }
+
+        while (1) {
+            error_code = avcodec_receive_frame(av.codec_ctx, av.frame);
+            if (error_code == AVERROR(EAGAIN)) {
+                av_frame_unref(av.frame);
+                break;
+            }
+
+            if (error_code < 0) {
+                av_packet_unref(av.packet);
+                av_frame_unref(av.frame);
+                goto cleanup;
+            }
+
+            uint8_t *out_buffer = NULL;
+            const int32_t out_samples =
+                swr_get_out_samples(swr.ctx, av.frame->nb_samples);
+            av_samples_alloc(
+                &out_buffer, NULL, swr.dst_channels, out_samples,
+                swr.dst_format, 1);
+            int32_t resampled_size = swr_convert(
+                swr.ctx, &out_buffer, out_samples,
+                (const uint8_t **)av.frame->data, av.frame->nb_samples);
+            while (resampled_size > 0) {
+                int32_t out_buffer_size = av_samples_get_buffer_size(
+                    NULL, swr.dst_channels, resampled_size, swr.dst_format, 1);
+
+                if (out_buffer_size > 0) {
+                    working_buffer = Memory_Realloc(
+                        working_buffer, working_buffer_size + out_buffer_size);
+                    if (out_buffer) {
+                        memcpy(
+                            (uint8_t *)working_buffer + working_buffer_size,
+                            out_buffer, out_buffer_size);
+                    }
+                    working_buffer_size += out_buffer_size;
+                }
+
+                resampled_size =
+                    swr_convert(swr.ctx, &out_buffer, out_samples, NULL, 0);
+            }
+
+            av_freep(&out_buffer);
+            av_frame_unref(av.frame);
+        }
+
+        av_packet_unref(av.packet);
+    }
+
+    int32_t sample_format_bytes = av_get_bytes_per_sample(swr.dst_format);
+    sample->num_samples =
+        working_buffer_size / sample_format_bytes / swr.dst_channels;
+    sample->channels = swr.src_channels;
+    sample->sample_data = working_buffer;
+
+    ret = true;
+
+cleanup:
+    if (error_code > 0) {
+        LOG_ERROR(
+            "Error while opening sample ID %d: %s", sample_id,
+            av_err2str(error_code));
+    }
+
+    if (swr.ctx) {
+        swr_free(&swr.ctx);
+    }
+
+    if (av.frame) {
+        av_frame_free(&av.frame);
+    }
+
+    if (av.packet) {
+        av_packet_free(&av.packet);
+    }
+
+    av.codec = NULL;
+
+    if (!ret) {
+        sample->sample_data = NULL;
+        sample->num_samples = 0;
+        sample->channels = 0;
+
+        Memory_FreePointer(&working_buffer);
+    }
+
+    if (av.codec_ctx) {
+        avcodec_close(av.codec_ctx);
+        av_freep(&av.codec_ctx);
+    }
+
+    if (av.format_ctx) {
+        avformat_close_input(&av.format_ctx);
+    }
+
+    if (av.avio_context) {
+        av_freep(&av.avio_context->buffer);
+        avio_context_free(&av.avio_context);
+    }
+
+    return ret;
+}
+
+void Audio_Sample_Init(void)
+{
+    for (int32_t sound_id = 0; sound_id < AUDIO_MAX_ACTIVE_SAMPLES;
+         sound_id++) {
+        AUDIO_SAMPLE_SOUND *sound = &m_Sample_s[sound_id];
+        sound->is_used = false;
+        sound->is_playing = false;
+        sound->volume = 0.0f;
+        sound->pitch = 1.0f;
+        sound->pan = 0.0f;
+        sound->current_sample = 0.0f;
+        sound->sample = NULL;
+    }
+}
+
+void Audio_Sample_Shutdown(void)
+{
+    if (!g_AudioDeviceID) {
+        return;
+    }
+
+    Audio_Sample_ClearAll();
+}
+
+bool Audio_Sample_ClearAll(void)
+{
+    if (!g_AudioDeviceID) {
+        return false;
+    }
+
+    Audio_Sample_CloseAll();
+
+    for (int32_t i = 0; i < AUDIO_MAX_SAMPLES; i++) {
+        Memory_FreePointer(&m_LoadedSamples[i].sample_data);
+    }
+
+    return true;
+}
+
+bool Audio_Sample_Load(size_t count, const char **contents, size_t *sizes)
+{
+    assert(contents != NULL);
+    assert(sizes != NULL);
+
+    if (!g_AudioDeviceID) {
+        return false;
+    }
+
+    assert(count <= AUDIO_MAX_SAMPLES);
+
+    Audio_Sample_ClearAll();
+
+    bool result = true;
+    for (int32_t sample_id = 0; sample_id < (int32_t)count; sample_id++) {
+        result &=
+            Audio_SampleLoad(sample_id, contents[sample_id], sizes[sample_id]);
+    }
+    if (result) {
+        m_LoadedSamplesCount = count;
+    } else {
+        Audio_Sample_ClearAll();
+    }
+    return result;
+}
+
+int32_t Audio_Sample_Play(
+    int32_t sample_id, int32_t volume, float pitch, int32_t pan, bool is_looped)
+{
+    if (!g_AudioDeviceID || sample_id < 0
+        || sample_id >= m_LoadedSamplesCount) {
+        return AUDIO_NO_SOUND;
+    }
+
+    int32_t result = AUDIO_NO_SOUND;
+
+    SDL_LockAudioDevice(g_AudioDeviceID);
+    for (int32_t sound_id = 0; sound_id < AUDIO_MAX_ACTIVE_SAMPLES;
+         sound_id++) {
+        AUDIO_SAMPLE_SOUND *sound = &m_Sample_s[sound_id];
+        if (sound->is_used) {
+            continue;
+        }
+
+        sound->is_used = true;
+        sound->is_playing = true;
+        sound->volume = volume;
+        sound->pitch = pitch;
+        sound->pan = pan;
+        sound->is_looped = is_looped;
+        sound->current_sample = 0.0f;
+        sound->sample = &m_LoadedSamples[sample_id];
+
+        Audio_SampleRecalculateChannelVolumes(sound_id);
+
+        result = sound_id;
+        break;
+    }
+    SDL_UnlockAudioDevice(g_AudioDeviceID);
+
+    if (result == AUDIO_NO_SOUND) {
+        LOG_ERROR("All sample buffers are used!");
+    }
+
+    return result;
+}
+
+bool Audio_Sample_IsPlaying(int32_t sound_id)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_SAMPLES) {
+        return false;
+    }
+
+    return m_Sample_s[sound_id].is_playing;
+}
+
+bool Audio_Sample_Pause(int32_t sound_id)
+{
+    if (!g_AudioDeviceID) {
+        return false;
+    }
+
+    if (m_Sample_s[sound_id].is_playing) {
+        SDL_LockAudioDevice(g_AudioDeviceID);
+        m_Sample_s[sound_id].is_playing = false;
+        SDL_UnlockAudioDevice(g_AudioDeviceID);
+    }
+
+    return true;
+}
+
+bool Audio_Sample_PauseAll(void)
+{
+    if (!g_AudioDeviceID) {
+        return false;
+    }
+
+    for (int32_t sound_id = 0; sound_id < AUDIO_MAX_ACTIVE_SAMPLES;
+         sound_id++) {
+        if (m_Sample_s[sound_id].is_used) {
+            Audio_Sample_Pause(sound_id);
+        }
+    }
+
+    return true;
+}
+
+bool Audio_Sample_Unpause(int32_t sound_id)
+{
+    if (!g_AudioDeviceID) {
+        return false;
+    }
+
+    if (!m_Sample_s[sound_id].is_playing) {
+        SDL_LockAudioDevice(g_AudioDeviceID);
+        m_Sample_s[sound_id].is_playing = true;
+        SDL_UnlockAudioDevice(g_AudioDeviceID);
+    }
+
+    return true;
+}
+
+bool Audio_Sample_UnpauseAll(void)
+{
+    if (!g_AudioDeviceID) {
+        return false;
+    }
+
+    for (int32_t sound_id = 0; sound_id < AUDIO_MAX_ACTIVE_SAMPLES;
+         sound_id++) {
+        if (m_Sample_s[sound_id].is_used) {
+            Audio_Sample_Unpause(sound_id);
+        }
+    }
+
+    return true;
+}
+
+bool Audio_Sample_Close(int32_t sound_id)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_SAMPLES) {
+        return false;
+    }
+
+    SDL_LockAudioDevice(g_AudioDeviceID);
+    m_Sample_s[sound_id].is_used = false;
+    m_Sample_s[sound_id].is_playing = false;
+    SDL_UnlockAudioDevice(g_AudioDeviceID);
+
+    return true;
+}
+
+bool Audio_Sample_CloseAll(void)
+{
+    if (!g_AudioDeviceID) {
+        return false;
+    }
+
+    for (int32_t sound_id = 0; sound_id < AUDIO_MAX_ACTIVE_SAMPLES;
+         sound_id++) {
+        if (m_Sample_s[sound_id].is_used) {
+            Audio_Sample_Close(sound_id);
+        }
+    }
+
+    return true;
+}
+
+bool Audio_Sample_SetPan(int32_t sound_id, int32_t pan)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_SAMPLES) {
+        return false;
+    }
+
+    SDL_LockAudioDevice(g_AudioDeviceID);
+    m_Sample_s[sound_id].pan = pan;
+    Audio_SampleRecalculateChannelVolumes(sound_id);
+    SDL_UnlockAudioDevice(g_AudioDeviceID);
+
+    return true;
+}
+
+bool Audio_Sample_SetVolume(int32_t sound_id, int32_t volume)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_SAMPLES) {
+        return false;
+    }
+
+    SDL_LockAudioDevice(g_AudioDeviceID);
+    m_Sample_s[sound_id].volume = volume;
+    Audio_SampleRecalculateChannelVolumes(sound_id);
+    SDL_UnlockAudioDevice(g_AudioDeviceID);
+
+    return true;
+}
+
+void Audio_Sample_Mix(float *dst_buffer, size_t len)
+{
+    for (int32_t sound_id = 0; sound_id < AUDIO_MAX_ACTIVE_SAMPLES;
+         sound_id++) {
+        AUDIO_SAMPLE_SOUND *sound = &m_Sample_s[sound_id];
+        if (!sound->is_playing) {
+            continue;
+        }
+
+        int32_t samples_requested =
+            len / sizeof(AUDIO_WORKING_FORMAT) / AUDIO_WORKING_CHANNELS;
+        float src_sample_idx = sound->current_sample;
+        const float *src_buffer = sound->sample->sample_data;
+        float *dst_ptr = dst_buffer;
+
+        while ((dst_ptr - dst_buffer) / AUDIO_WORKING_CHANNELS
+               < samples_requested) {
+
+            // because we handle 3d sound ourselves, downmix to mono
+            float src_sample = 0.0f;
+            for (int32_t i = 0; i < sound->sample->channels; i++) {
+                src_sample += src_buffer
+                    [(int32_t)src_sample_idx * sound->sample->channels + i];
+            }
+            src_sample /= (float)sound->sample->channels;
+
+            *dst_ptr++ += src_sample * sound->volume_l;
+            *dst_ptr++ += src_sample * sound->volume_r;
+            src_sample_idx += sound->pitch;
+
+            if ((int32_t)src_sample_idx >= sound->sample->num_samples) {
+                if (sound->is_looped) {
+                    src_sample_idx = 0.0f;
+                } else {
+                    break;
+                }
+            }
+        }
+
+        sound->current_sample = src_sample_idx;
+        if (sound->current_sample >= sound->sample->num_samples
+            && !sound->is_looped) {
+            Audio_Sample_Close(sound_id);
+        }
+    }
+}
diff --git a/src/engine/audio_stream.c b/src/engine/audio_stream.c
new file mode 100644
index 0000000..bb1df57
--- /dev/null
+++ b/src/engine/audio_stream.c
@@ -0,0 +1,602 @@
+#include "audio.h"
+
+#include "filesystem.h"
+#include "log.h"
+#include "memory.h"
+
+#include <SDL2/SDL_audio.h>
+#include <SDL2/SDL_error.h>
+#include <assert.h>
+#include <errno.h>
+#include <libavcodec/avcodec.h>
+#include <libavcodec/codec.h>
+#include <libavcodec/packet.h>
+#include <libavformat/avformat.h>
+#include <libavformat/avio.h>
+#include <libavutil/avutil.h>
+#include <libavutil/error.h>
+#include <libavutil/frame.h>
+#include <libavutil/mem.h>
+#include <libavutil/rational.h>
+#include <libavutil/samplefmt.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#define READ_BUFFER_SIZE                                                       \
+    (AUDIO_SAMPLES * AUDIO_WORKING_CHANNELS * sizeof(AUDIO_WORKING_FORMAT))
+
+typedef struct AUDIO_STREAM_SOUND {
+    bool is_used;
+    bool is_playing;
+    bool is_read_done;
+    bool is_looped;
+    float volume;
+    double duration;
+    double timestamp;
+
+    void (*finish_callback)(int32_t sound_id, void *user_data);
+    void *finish_callback_user_data;
+
+    struct {
+        AVStream *stream;
+        AVFormatContext *format_ctx;
+        const AVCodec *codec;
+        AVCodecContext *codec_ctx;
+        AVPacket *packet;
+        AVFrame *frame;
+    } av;
+
+    struct {
+        SDL_AudioStream *stream;
+    } sdl;
+} AUDIO_STREAM_SOUND;
+
+extern SDL_AudioDeviceID g_AudioDeviceID;
+
+static AUDIO_STREAM_SOUND m_Stream_s[AUDIO_MAX_ACTIVE_STREAMS] = { 0 };
+static float m_DecodeBuffer[AUDIO_SAMPLES * AUDIO_WORKING_CHANNELS] = { 0 };
+
+static bool Audio_Stream_DecodeFrame(AUDIO_STREAM_SOUND *stream);
+static bool Audio_Stream_EnqueueFrame(AUDIO_STREAM_SOUND *stream);
+static bool Audio_Stream_InitialiseFromPath(
+    int32_t sound_id, const char *file_path);
+static void Audio_Stream_Clear(AUDIO_STREAM_SOUND *stream);
+
+static bool Audio_Stream_DecodeFrame(AUDIO_STREAM_SOUND *stream)
+{
+    assert(stream != NULL);
+
+    int32_t error_code =
+        av_read_frame(stream->av.format_ctx, stream->av.packet);
+
+    if (error_code == AVERROR_EOF && stream->is_looped) {
+        avio_seek(stream->av.format_ctx->pb, 0, SEEK_SET);
+        avformat_seek_file(
+            stream->av.format_ctx, -1, 0, 0, 0, AVSEEK_FLAG_FRAME);
+        return Audio_Stream_DecodeFrame(stream);
+    }
+
+    if (error_code < 0) {
+        return false;
+    }
+
+    if (stream->av.packet->stream_index != stream->av.stream->index) {
+        return true;
+    }
+
+    error_code = avcodec_send_packet(stream->av.codec_ctx, stream->av.packet);
+    if (error_code < 0) {
+        av_packet_unref(stream->av.packet);
+        LOG_ERROR(
+            "Got an error when decoding frame: %s", av_err2str(error_code));
+        return false;
+    }
+
+    return true;
+}
+
+static bool Audio_Stream_EnqueueFrame(AUDIO_STREAM_SOUND *stream)
+{
+    assert(stream != NULL);
+
+    while (1) {
+        int32_t error_code =
+            avcodec_receive_frame(stream->av.codec_ctx, stream->av.frame);
+        if (error_code == AVERROR(EAGAIN)) {
+            av_frame_unref(stream->av.frame);
+            break;
+        }
+
+        if (error_code < 0) {
+            av_packet_unref(stream->av.packet);
+            av_frame_unref(stream->av.frame);
+            LOG_ERROR(
+                "Got an error when decoding frame: %d, %s", error_code,
+                av_err2str(error_code));
+            break;
+        }
+
+        error_code = av_samples_get_buffer_size(
+            NULL, stream->av.codec_ctx->channels, stream->av.frame->nb_samples,
+            stream->av.codec_ctx->sample_fmt, 1);
+
+        if (error_code == AVERROR(EAGAIN)) {
+            av_frame_unref(stream->av.frame);
+            break;
+        }
+
+        if (error_code < 0) {
+            LOG_ERROR(
+                "Got an error when decoding frame: %d, %s", error_code,
+                av_err2str(error_code));
+            av_frame_unref(stream->av.frame);
+            break;
+        }
+
+        int32_t data_size = error_code;
+
+        if (SDL_AudioStreamPut(
+                stream->sdl.stream, stream->av.frame->data[0], data_size)) {
+            LOG_ERROR("Got an error when decoding frame: %s", SDL_GetError());
+            av_frame_unref(stream->av.frame);
+            break;
+        }
+
+        double time_base_sec = av_q2d(stream->av.stream->time_base);
+        stream->timestamp =
+            stream->av.frame->best_effort_timestamp * time_base_sec;
+        av_frame_unref(stream->av.frame);
+    }
+
+    av_packet_unref(stream->av.packet);
+    return true;
+}
+
+static bool Audio_Stream_InitialiseFromPath(
+    int32_t sound_id, const char *file_path)
+{
+    assert(file_path != NULL);
+
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return false;
+    }
+
+    bool ret = false;
+    SDL_LockAudioDevice(g_AudioDeviceID);
+
+    int32_t error_code;
+    char *full_path = File_GetFullPath(file_path);
+
+    AUDIO_STREAM_SOUND *stream = &m_Stream_s[sound_id];
+
+    error_code =
+        avformat_open_input(&stream->av.format_ctx, full_path, NULL, NULL);
+    if (error_code != 0) {
+        goto cleanup;
+    }
+
+    error_code = avformat_find_stream_info(stream->av.format_ctx, NULL);
+    if (error_code < 0) {
+        goto cleanup;
+    }
+
+    stream->av.stream = NULL;
+    for (uint32_t i = 0; i < stream->av.format_ctx->nb_streams; i++) {
+        AVStream *current_stream = stream->av.format_ctx->streams[i];
+        if (current_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+            stream->av.stream = current_stream;
+            break;
+        }
+    }
+    if (!stream->av.stream) {
+        error_code = AVERROR_STREAM_NOT_FOUND;
+        goto cleanup;
+    }
+
+    stream->av.codec =
+        avcodec_find_decoder(stream->av.stream->codecpar->codec_id);
+    if (!stream->av.codec) {
+        error_code = AVERROR_DEMUXER_NOT_FOUND;
+        goto cleanup;
+    }
+
+    stream->av.codec_ctx = avcodec_alloc_context3(stream->av.codec);
+    if (!stream->av.codec_ctx) {
+        error_code = AVERROR(ENOMEM);
+        goto cleanup;
+    }
+
+    error_code = avcodec_parameters_to_context(
+        stream->av.codec_ctx, stream->av.stream->codecpar);
+    if (error_code) {
+        goto cleanup;
+    }
+
+    error_code = avcodec_open2(stream->av.codec_ctx, stream->av.codec, NULL);
+    if (error_code < 0) {
+        goto cleanup;
+    }
+
+    stream->av.packet = av_packet_alloc();
+    if (!stream->av.packet) {
+        error_code = AVERROR(ENOMEM);
+        goto cleanup;
+    }
+
+    stream->av.frame = av_frame_alloc();
+    if (!stream->av.frame) {
+        error_code = AVERROR(ENOMEM);
+        goto cleanup;
+    }
+
+    Audio_Stream_DecodeFrame(stream);
+
+    int32_t sdl_format =
+        Audio_GetSDLAudioFormat(stream->av.codec_ctx->sample_fmt);
+    if (sdl_format < 0) {
+        LOG_ERROR(
+            "Unknown sample format: %d", stream->av.codec_ctx->sample_fmt);
+        goto cleanup;
+    }
+
+    int32_t sdl_sample_rate = stream->av.codec_ctx->sample_rate;
+    int32_t sdl_channels = stream->av.codec_ctx->channels;
+
+    stream->is_read_done = false;
+    stream->is_used = true;
+    stream->is_playing = true;
+    stream->is_looped = false;
+    stream->volume = 1.0f;
+    stream->timestamp = 0.0;
+    stream->finish_callback = NULL;
+    stream->finish_callback_user_data = NULL;
+    stream->duration =
+        (double)stream->av.format_ctx->duration / (double)AV_TIME_BASE;
+
+    stream->sdl.stream = SDL_NewAudioStream(
+        sdl_format, sdl_channels, sdl_sample_rate, AUDIO_WORKING_FORMAT,
+        sdl_channels, AUDIO_WORKING_RATE);
+    if (!stream->sdl.stream) {
+        LOG_ERROR("Failed to create SDL stream: %s", SDL_GetError());
+        goto cleanup;
+    }
+
+    ret = true;
+    Audio_Stream_EnqueueFrame(stream);
+
+cleanup:
+    if (error_code) {
+        LOG_ERROR(
+            "Error while opening audio %s: %s", file_path,
+            av_err2str(error_code));
+    }
+
+    if (!ret) {
+        Audio_Stream_Close(sound_id);
+    }
+
+    SDL_UnlockAudioDevice(g_AudioDeviceID);
+    Memory_FreePointer(&full_path);
+    return ret;
+}
+
+static void Audio_Stream_Clear(AUDIO_STREAM_SOUND *stream)
+{
+    assert(stream != NULL);
+
+    stream->is_used = false;
+    stream->is_playing = false;
+    stream->is_read_done = true;
+    stream->is_looped = false;
+    stream->volume = 0.0f;
+    stream->duration = 0.0;
+    stream->timestamp = 0.0;
+    stream->sdl.stream = NULL;
+    stream->finish_callback = NULL;
+    stream->finish_callback_user_data = NULL;
+}
+
+void Audio_Stream_Init(void)
+{
+    for (int32_t sound_id = 0; sound_id < AUDIO_MAX_ACTIVE_STREAMS;
+         sound_id++) {
+        Audio_Stream_Clear(&m_Stream_s[sound_id]);
+    }
+}
+
+void Audio_Stream_Shutdown(void)
+{
+    if (!g_AudioDeviceID) {
+        return;
+    }
+
+    for (int32_t sound_id = 0; sound_id < AUDIO_MAX_ACTIVE_STREAMS;
+         sound_id++) {
+        if (m_Stream_s[sound_id].is_used) {
+            Audio_Stream_Close(sound_id);
+        }
+    }
+}
+
+bool Audio_Stream_Pause(int32_t sound_id)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return false;
+    }
+
+    if (m_Stream_s[sound_id].is_playing) {
+        SDL_LockAudioDevice(g_AudioDeviceID);
+        m_Stream_s[sound_id].is_playing = false;
+        SDL_UnlockAudioDevice(g_AudioDeviceID);
+    }
+
+    return true;
+}
+
+bool Audio_Stream_Unpause(int32_t sound_id)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return false;
+    }
+
+    if (!m_Stream_s[sound_id].is_playing) {
+        SDL_LockAudioDevice(g_AudioDeviceID);
+        m_Stream_s[sound_id].is_playing = true;
+        SDL_UnlockAudioDevice(g_AudioDeviceID);
+    }
+
+    return true;
+}
+
+int32_t Audio_Stream_CreateFromFile(const char *file_path)
+{
+    assert(file_path != NULL);
+
+    for (int32_t sound_id = 0; sound_id < AUDIO_MAX_ACTIVE_STREAMS;
+         sound_id++) {
+        AUDIO_STREAM_SOUND *stream = &m_Stream_s[sound_id];
+        if (stream->is_used) {
+            continue;
+        }
+
+        if (!Audio_Stream_InitialiseFromPath(sound_id, file_path)) {
+            return AUDIO_NO_SOUND;
+        }
+
+        return sound_id;
+    }
+
+    return AUDIO_NO_SOUND;
+}
+
+bool Audio_Stream_Close(int32_t sound_id)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return false;
+    }
+
+    SDL_LockAudioDevice(g_AudioDeviceID);
+
+    AUDIO_STREAM_SOUND *stream = &m_Stream_s[sound_id];
+
+    if (stream->av.codec_ctx) {
+        avcodec_close(stream->av.codec_ctx);
+        av_free(stream->av.codec_ctx);
+        stream->av.codec_ctx = NULL;
+    }
+
+    if (stream->av.format_ctx) {
+        avformat_close_input(&stream->av.format_ctx);
+        stream->av.format_ctx = NULL;
+    }
+
+    if (stream->av.packet) {
+        av_packet_free(&stream->av.packet);
+        stream->av.packet = NULL;
+    }
+
+    if (stream->av.frame) {
+        av_frame_free(&stream->av.frame);
+        stream->av.frame = NULL;
+    }
+
+    stream->av.stream = NULL;
+    stream->av.codec = NULL;
+
+    if (stream->sdl.stream) {
+        SDL_FreeAudioStream(stream->sdl.stream);
+    }
+
+    void (*finish_callback)(int32_t, void *) = stream->finish_callback;
+    void *finish_callback_user_data = stream->finish_callback_user_data;
+
+    Audio_Stream_Clear(stream);
+
+    SDL_UnlockAudioDevice(g_AudioDeviceID);
+
+    if (finish_callback) {
+        finish_callback(sound_id, finish_callback_user_data);
+    }
+
+    return true;
+}
+
+bool Audio_Stream_SetVolume(int32_t sound_id, float volume)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return false;
+    }
+
+    m_Stream_s[sound_id].volume = volume;
+
+    return true;
+}
+
+bool Audio_Stream_IsLooped(int32_t sound_id)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return false;
+    }
+
+    return m_Stream_s[sound_id].is_looped;
+}
+
+bool Audio_Stream_SetIsLooped(int32_t sound_id, bool is_looped)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return false;
+    }
+
+    m_Stream_s[sound_id].is_looped = is_looped;
+
+    return true;
+}
+
+bool Audio_Stream_SetFinishCallback(
+    int32_t sound_id, void (*callback)(int32_t sound_id, void *user_data),
+    void *user_data)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return false;
+    }
+
+    m_Stream_s[sound_id].finish_callback = callback;
+    m_Stream_s[sound_id].finish_callback_user_data = user_data;
+
+    return true;
+}
+
+void Audio_Stream_Mix(float *dst_buffer, size_t len)
+{
+    for (int32_t sound_id = 0; sound_id < AUDIO_MAX_ACTIVE_STREAMS;
+         sound_id++) {
+        AUDIO_STREAM_SOUND *stream = &m_Stream_s[sound_id];
+        if (!stream->is_playing) {
+            continue;
+        }
+
+        while ((SDL_AudioStreamAvailable(stream->sdl.stream) < (int32_t)len)
+               && !stream->is_read_done) {
+            if (Audio_Stream_DecodeFrame(stream)) {
+                Audio_Stream_EnqueueFrame(stream);
+            } else {
+                stream->is_read_done = true;
+            }
+        }
+
+        memset(m_DecodeBuffer, 0, READ_BUFFER_SIZE);
+        int32_t bytes_gotten = SDL_AudioStreamGet(
+            stream->sdl.stream, m_DecodeBuffer, READ_BUFFER_SIZE);
+        if (bytes_gotten < 0) {
+            LOG_ERROR("Error reading from sdl.stream: %s", SDL_GetError());
+            stream->is_playing = false;
+            stream->is_used = false;
+            stream->is_read_done = true;
+        } else if (bytes_gotten == 0) {
+            // legit end of stream. looping is handled in
+            // Audio_Stream_DecodeFrame
+            stream->is_playing = false;
+            stream->is_used = false;
+            stream->is_read_done = true;
+        } else {
+            int32_t samples_gotten = bytes_gotten
+                / (stream->av.codec_ctx->channels
+                   * sizeof(AUDIO_WORKING_FORMAT));
+
+            const float *src_ptr = &m_DecodeBuffer[0];
+            float *dst_ptr = dst_buffer;
+
+            if (stream->av.codec_ctx->channels == 2) {
+                for (int32_t s = 0; s < samples_gotten; s++) {
+                    *dst_ptr++ += *src_ptr++ * stream->volume;
+                    *dst_ptr++ += *src_ptr++ * stream->volume;
+                }
+            } else if (stream->av.codec_ctx->channels == 1) {
+                for (int32_t s = 0; s < samples_gotten; s++) {
+                    *dst_ptr++ += *src_ptr * stream->volume;
+                    *dst_ptr++ += *src_ptr++ * stream->volume;
+                }
+            } else {
+                for (int32_t s = 0; s < samples_gotten; s++) {
+                    // downmix to mono
+                    float src_sample = 0.0f;
+                    for (int32_t i = 0; i < stream->av.codec_ctx->channels;
+                         i++) {
+                        src_sample += *src_ptr++;
+                    }
+                    src_sample /= (float)stream->av.codec_ctx->channels;
+                    *dst_ptr++ += src_sample * stream->volume;
+                    *dst_ptr++ += src_sample * stream->volume;
+                }
+            }
+        }
+
+        if (!stream->is_used) {
+            Audio_Stream_Close(sound_id);
+        }
+    }
+}
+
+double Audio_Stream_GetTimestamp(int32_t sound_id)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return -1.0;
+    }
+
+    double timestamp = -1.0;
+    AUDIO_STREAM_SOUND *stream = &m_Stream_s[sound_id];
+
+    if (stream->duration > 0.0) {
+        SDL_LockAudioDevice(g_AudioDeviceID);
+        timestamp = stream->timestamp;
+        SDL_UnlockAudioDevice(g_AudioDeviceID);
+    }
+
+    return timestamp;
+}
+
+double Audio_Stream_GetDuration(int32_t sound_id)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return -1.0;
+    }
+
+    SDL_LockAudioDevice(g_AudioDeviceID);
+    AUDIO_STREAM_SOUND *stream = &m_Stream_s[sound_id];
+    double duration = stream->duration;
+    SDL_UnlockAudioDevice(g_AudioDeviceID);
+    return duration;
+}
+
+bool Audio_Stream_SeekTimestamp(int32_t sound_id, double timestamp)
+{
+    if (!g_AudioDeviceID || sound_id < 0
+        || sound_id >= AUDIO_MAX_ACTIVE_STREAMS) {
+        return false;
+    }
+
+    if (m_Stream_s[sound_id].is_playing) {
+        SDL_LockAudioDevice(g_AudioDeviceID);
+        AUDIO_STREAM_SOUND *stream = &m_Stream_s[sound_id];
+        const double time_base_sec = av_q2d(stream->av.stream->time_base);
+        av_seek_frame(
+            stream->av.format_ctx, 0, timestamp / time_base_sec,
+            AVSEEK_FLAG_ANY);
+        SDL_UnlockAudioDevice(g_AudioDeviceID);
+        return true;
+    }
+
+    return false;
+}
diff --git a/tools/ffmpeg_flags.txt b/tools/ffmpeg_flags.txt
new file mode 100644
index 0000000..ec069f3
--- /dev/null
+++ b/tools/ffmpeg_flags.txt
@@ -0,0 +1,27 @@
+--enable-gpl
+--enable-decoder=pcx
+--enable-decoder=png
+--enable-decoder=gif
+--enable-decoder=mjpeg
+--enable-decoder=mpeg4
+--enable-decoder=mdec
+--enable-decoder=h264
+--enable-decoder=h264_qsv
+--enable-decoder=libopenh264
+--enable-demuxer=mov
+--enable-demuxer=avi
+--enable-demuxer=h264
+--enable-demuxer=str
+--enable-demuxer=image2
+--enable-zlib
+--enable-small
+--disable-debug
+--disable-ffplay
+--disable-ffprobe
+--disable-doc
+--disable-network
+--disable-htmlpages
+--disable-manpages
+--disable-podpages
+--disable-txtpages
+--disable-asm