first draft of audio fft spectrum visualizer (#5348)

3 месяцев назад · 4dca02daa5
--- a/examples/Makefile
+++ b/examples/Makefile
@ -703,6 +703,7 @@ SHADERS = \
    shaders/shaders_vertex_displacement

 AUDIO = \
 	audio/audio_fft_spectrum_visualizer \
    audio/audio_mixed_processor \
    audio/audio_module_playing \
    audio/audio_music_stream \
--- a/examples/audio/audio_fft_spectrum_visualizer.c
+++ b/examples/audio/audio_fft_spectrum_visualizer.c
@ -0,0 +1,279 @@
 /*******************************************************************************************
 *
 *   raylib [audio] example - fft spectrum visualizer
 *
 *   Example complexity rating: [★★★☆] 3/4
 *
 *   Example originally created with raylib 6.0
 *
 *   Inspired by Inigo Quilez's https://www.shadertoy.com/
 *   Resources/specification: https://gist.github.com/soulthreads/2efe50da4be1fb5f7ab60ff14ca434b8
 *
 *   Example created by created by IANN (@meisei4) reviewed by Ramon Santamaria (@raysan5)
 *
 *   Example licensed under an unmodified zlib/libpng license, which is an OSI-certified,
 *   BSD-like license that allows static linking with closed source software
 *
 *   Copyright (c) 2025 IANN (@meisei4)
 *
 ********************************************************************************************/

 #include "raylib.h"
 #include "raymath.h"
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>

 #define MONO                           1
 #define SAMPLE_RATE                    44100
 #define SAMPLE_RATE_F                  44100.0f
 #define FFT_WINDOW_SIZE                1024
 #define BUFFER_SIZE                    512
 #define PER_SAMPLE_BIT_DEPTH           16
 #define AUDIO_STREAM_RING_BUFFER_SIZE  (FFT_WINDOW_SIZE*2)
 #define EFFECTIVE_SAMPLE_RATE          (SAMPLE_RATE_F*0.5f)
 #define WINDOW_TIME                    ((double)FFT_WINDOW_SIZE/(double)EFFECTIVE_SAMPLE_RATE)
 #define FFT_HISTORICAL_SMOOTHING_DUR   2.0f
 #define MIN_DECIBELS                   (-100.0f) // https://developer.mozilla.org/en-US/docs/Web/API/AnalyserNode/minDecibels
 #define MAX_DECIBELS                   (-30.0f)  // https://developer.mozilla.org/en-US/docs/Web/API/AnalyserNode/maxDecibels
 #define INVERSE_DECIBEL_RANGE          (1.0f/(MAX_DECIBELS - MIN_DECIBELS))
 #define DB_TO_LINEAR_SCALE             (20.0f/2.302585092994046f)
 #define SMOOTHING_TIME_CONSTANT        0.8f // https://developer.mozilla.org/en-US/docs/Web/API/AnalyserNode/smoothingTimeConstant
 #define TEXTURE_HEIGHT                 1
 #define FFT_ROW                        0
 #define UNUSED_CHANNEL                 0.0f

 typedef struct FFTComplex { float real, imaginary; } FFTComplex;

 typedef struct FFTData {
    FFTComplex *spectrum;
    FFTComplex *workBuffer;
    float *prevMagnitudes;
    float (*fftHistory)[BUFFER_SIZE];
    int fftHistoryLen;
    int historyPos;
    double lastFftTime;
    float tapbackPos;
 } FFTData;

 static void CaptureFrame(FFTData *fftData, const float *audioSamples);
 static void RenderFrame(const FFTData *fftData, Image *fftImage);
 static void CooleyTukeyFFTSlow(FFTComplex *spectrum, int n);

 //------------------------------------------------------------------------------------
 // Program main entry point
 //------------------------------------------------------------------------------------
 int main(void)
 {
    // Initialization
    //-----------------------------------------------------------------------------------     ---
    const int screenWidth = 800;
    const int screenHeight = 450;

    InitWindow(screenWidth, screenHeight, "raylib [audio] example - fft spectrum visualizer");

    Image fftImage = GenImageColor(BUFFER_SIZE, TEXTURE_HEIGHT, WHITE);
    Texture2D fftTexture = LoadTextureFromImage(fftImage);
    RenderTexture2D bufferA = LoadRenderTexture(screenWidth, screenHeight);
    Vector2 iResolution = { (float)screenWidth, (float)screenHeight };

    Shader shader = LoadShader(NULL, "resources/fft.glsl");
    int iResolutionLocation = GetShaderLocation(shader, "iResolution");
    int iChannel0Location = GetShaderLocation(shader, "iChannel0");
    SetShaderValue(shader, iResolutionLocation, &iResolution, SHADER_UNIFORM_VEC2);
    SetShaderValueTexture(shader, iChannel0Location, fftTexture);

    InitAudioDevice();
    SetAudioStreamBufferSizeDefault(AUDIO_STREAM_RING_BUFFER_SIZE);

    Wave wav = LoadWave("resources/country.mp3");
    WaveFormat(&wav, SAMPLE_RATE, PER_SAMPLE_BIT_DEPTH, MONO);

    AudioStream audioStream = LoadAudioStream(SAMPLE_RATE, PER_SAMPLE_BIT_DEPTH, MONO);
    PlayAudioStream(audioStream);

    int fftHistoryLen = (int)ceilf(FFT_HISTORICAL_SMOOTHING_DUR/WINDOW_TIME) + 1;

    FFTData fft = {
        .spectrum = malloc(sizeof(FFTComplex)*FFT_WINDOW_SIZE),
        .workBuffer = malloc(sizeof(FFTComplex)*FFT_WINDOW_SIZE),
        .prevMagnitudes = calloc(BUFFER_SIZE, sizeof(float)),
        .fftHistory = calloc(fftHistoryLen, sizeof(float[BUFFER_SIZE])),
        .fftHistoryLen = fftHistoryLen,
        .historyPos = 0,
        .lastFftTime = 0.0,
        .tapbackPos = 0.01f
    };

    size_t wavCursor = 0;
    const short *wavPCM16 = wav.data;

    short chunkSamples[AUDIO_STREAM_RING_BUFFER_SIZE] = { 0 };
    float audioSamples[FFT_WINDOW_SIZE] = { 0 };

    SetTargetFPS(60);
    //----------------------------------------------------------------------------------

    // Main game loop
    while (!WindowShouldClose())    // Detect window close button or ESC key
    {
        // Update
        //----------------------------------------------------------------------------------
        while (IsAudioStreamProcessed(audioStream))
        {
            for (int i = 0; i < AUDIO_STREAM_RING_BUFFER_SIZE; i++)
            {
                int left = (wav.channels == 2)? wavPCM16[wavCursor*2 + 0] : wavPCM16[wavCursor];
                int right = (wav.channels == 2)? wavPCM16[wavCursor*2 + 1] : left;
                chunkSamples[i] = (short)((left + right)/2);

                if (++wavCursor >= wav.frameCount)
                    wavCursor = 0;

            }

            UpdateAudioStream(audioStream, chunkSamples, AUDIO_STREAM_RING_BUFFER_SIZE);

            for (int i = 0; i < FFT_WINDOW_SIZE; i++)
                audioSamples[i] = (chunkSamples[i*2] + chunkSamples[i*2 + 1])*0.5f/32767.0f;
        }

        CaptureFrame(&fft, audioSamples);
        RenderFrame(&fft, &fftImage);
        UpdateTexture(fftTexture, fftImage.data);
        //------------------------------------------------------------------------------

        // Draw
        //----------------------------------------------------------------------------------
        BeginDrawing();
            ClearBackground(BLACK);
            BeginShaderMode(shader);
                SetShaderValueTexture(shader, iChannel0Location, fftTexture);
                DrawTextureRec(bufferA.texture,
                    (Rectangle){ 0, 0, (float)screenWidth, (float)-screenHeight },
                    (Vector2){ 0, 0 },
                    WHITE);
            EndShaderMode();
        EndDrawing();
        //------------------------------------------------------------------------------
    }

    // De-Initialization
    //--------------------------------------------------------------------------------------
    UnloadShader(shader);
    UnloadRenderTexture(bufferA);
    UnloadTexture(fftTexture);
    UnloadImage(fftImage);
    UnloadAudioStream(audioStream);
    UnloadWave(wav);
    CloseAudioDevice();

    free(fft.spectrum);
    free(fft.workBuffer);
    free(fft.prevMagnitudes);
    free(fft.fftHistory);

    CloseWindow();        // Close window and OpenGL context
    //----------------------------------------------------------------------------------

    return 0;
 }

 // Cooley–Tukey FFT https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
 static void CooleyTukeyFFTSlow(FFTComplex *spectrum, int n)
 {
    int j = 0;
    for (int i = 1; i < n - 1; i++)
    {
        int bit = n >> 1;
        while (j >= bit)
        {
            j -= bit;
            bit >>= 1;
        }
        j += bit;
        if (i < j)
        {
            FFTComplex temp = spectrum[i];
            spectrum[i] = spectrum[j];
            spectrum[j] = temp;
        }
    }

    for (int len = 2; len <= n; len <<= 1)
    {
        float angle = -2.0f*PI/len;
        FFTComplex twiddleUnit = { cosf(angle), sinf(angle) };
        for (int i = 0; i < n; i += len)
        {
            FFTComplex twiddleCurrent = { 1.0f, 0.0f };
            for (int j = 0; j < len/2; j++)
            {
                FFTComplex even = spectrum[i + j];
                FFTComplex odd = spectrum[i + j + len/2];
                FFTComplex twiddledOdd = {
                    odd.real*twiddleCurrent.real - odd.imaginary*twiddleCurrent.imaginary,
                    odd.real*twiddleCurrent.imaginary + odd.imaginary*twiddleCurrent.real
                };

                spectrum[i + j].real = even.real + twiddledOdd.real;
                spectrum[i + j].imaginary = even.imaginary + twiddledOdd.imaginary;
                spectrum[i + j + len/2].real = even.real - twiddledOdd.real;
                spectrum[i + j + len/2].imaginary = even.imaginary - twiddledOdd.imaginary;

                float twiddleRealNext = twiddleCurrent.real*twiddleUnit.real - twiddleCurrent.imaginary*twiddleUnit.imaginary;
                twiddleCurrent.imaginary = twiddleCurrent.real*twiddleUnit.imaginary + twiddleCurrent.imaginary*twiddleUnit.real;
                twiddleCurrent.real = twiddleRealNext;
            }
        }
    }
 }

 static void CaptureFrame(FFTData *fftData, const float *audioSamples)
 {
    for (int i = 0; i < FFT_WINDOW_SIZE; i++)
    {
        float x = (2.0f*PI*i)/(FFT_WINDOW_SIZE - 1.0f);
        float blackmanWeight  = 0.42f - 0.5f*cosf(x) + 0.08f*cosf(2.0f*x); // https://en.wikipedia.org/wiki/Window_function#Blackman_window
        fftData->workBuffer[i].real = audioSamples[i]*blackmanWeight;
        fftData->workBuffer[i].imaginary = 0.0f;
    }

    CooleyTukeyFFTSlow(fftData->workBuffer, FFT_WINDOW_SIZE);
    memcpy(fftData->spectrum, fftData->workBuffer, sizeof(FFTComplex)*FFT_WINDOW_SIZE);

    float smoothedSpectrum[BUFFER_SIZE];

    for (int bin = 0; bin < BUFFER_SIZE; bin++)
    {
        float re = fftData->workBuffer[bin].real;
        float im = fftData->workBuffer[bin].imaginary;
        float linearMagnitude = sqrtf(re*re + im*im)/FFT_WINDOW_SIZE;

        float smoothedMagnitude = SMOOTHING_TIME_CONSTANT*fftData->prevMagnitudes[bin] + (1.0f - SMOOTHING_TIME_CONSTANT)*linearMagnitude;
        fftData->prevMagnitudes[bin] = smoothedMagnitude;

        float db = logf(fmaxf(smoothedMagnitude, 1e-40f))*DB_TO_LINEAR_SCALE;
        float normalized = (db - MIN_DECIBELS)*INVERSE_DECIBEL_RANGE;
        smoothedSpectrum[bin] = Clamp(normalized, 0.0f, 1.0f);
    }

    fftData->lastFftTime = GetTime();
    memcpy(fftData->fftHistory[fftData->historyPos], smoothedSpectrum, sizeof(smoothedSpectrum));
    fftData->historyPos = (fftData->historyPos + 1) % fftData->fftHistoryLen;
 }

 static void RenderFrame(const FFTData *fftData, Image *fftImage)
 {
    double framesSinceTapback = floor(fftData->tapbackPos/WINDOW_TIME);
    framesSinceTapback = Clamp(framesSinceTapback, 0.0, fftData->fftHistoryLen - 1);

    int historyPosition = (fftData->historyPos - 1 - (int)framesSinceTapback) % fftData->fftHistoryLen;
    if (historyPosition < 0)
        historyPosition += fftData->fftHistoryLen;

    const float *amplitude = fftData->fftHistory[historyPosition];
    for (int bin = 0; bin < BUFFER_SIZE; bin++) {
        ImageDrawPixel(fftImage, bin, FFT_ROW, ColorFromNormalized((Vector4){ amplitude[bin], UNUSED_CHANNEL, UNUSED_CHANNEL, UNUSED_CHANNEL }));
    }
 }
--- a/examples/audio/audio_fft_spectrum_visualizer.png
+++ b/examples/audio/audio_fft_spectrum_visualizer.png
--- a/examples/audio/resources/fft.glsl
+++ b/examples/audio/resources/fft.glsl
@ -0,0 +1,32 @@
 #version 330

 in vec2 fragTexCoord;
 in vec4 fragColor;

 out vec4 finalColor;

 uniform vec2 iResolution;
 uniform sampler2D iChannel0;

 const vec4  BLACK = vec4(0.0, 0.0, 0.0, 1.0);
 const vec4  WHITE = vec4(1.0, 1.0, 1.0, 1.0);
 const float FFT_ROW = 0.0;
 const float NUM_OF_BINS = 512.0;

 void main() {
    vec2  fragCoord = fragTexCoord*iResolution;
    float cell_width = iResolution.x/NUM_OF_BINS;
    float bin_index = floor(fragCoord.x/cell_width);
    float local_x = mod(fragCoord.x, cell_width);
    float bar_width  = cell_width - 1.0;
    vec4  color = BLACK;
    if (local_x <= bar_width) {
        float sample_x = (bin_index + 0.5)/NUM_OF_BINS;
        vec2  sample_coord = vec2(sample_x, FFT_ROW);
        float amplitude = texture(iChannel0, sample_coord).r; // only filled the red channel, all channels left open for alternative use
        if (fragTexCoord.y < amplitude) {
            color = WHITE;
        }
    }
    finalColor = color;
 }