From bce7a7b5765defcfb5692dc73e7251c8dd3e9fe2 Mon Sep 17 00:00:00 2001
From: David Reid <mackron@gmail.com>
Date: Sun, 1 Jul 2018 20:51:10 +1000
Subject: [PATCH] Update mini_al.

---
 src/external/mini_al.h | 4106 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 3655 insertions(+), 451 deletions(-)

diff --git a/src/external/mini_al.h b/src/external/mini_al.h
index 4d05b218f..e8d412399 100644
--- a/src/external/mini_al.h
+++ b/src/external/mini_al.h
@@ -16,6 +16,7 @@
 //   - WASAPI
 //   - DirectSound
 //   - WinMM
+//   - Core Audio (macOS, iOS)
 //   - ALSA
 //   - PulseAudio
 //   - JACK
@@ -24,8 +25,6 @@
 //   - OpenAL
 //   - SDL
 //   - Null (Silence)
-//   - ... and more in the future.
-//     - Core Audio (OSX, iOS)
 //
 // Supported Formats:
 //   - Unsigned 8-bit PCM
@@ -43,11 +42,9 @@
 //
 // You can then #include this file in other parts of the program as you would with any other header file.
 //
-// The implementation of this library will try #include-ing necessary headers for some backends. If you do not have
-// the development packages for any particular backend you can disable it by #define-ing the appropriate MAL_NO_*
-// option before the implementation.
+// If you want to disable a specific backend, #define the appropriate MAL_NO_* option before the implementation.
 //
-// Note that GCC and Clang requires "-msse2", "-mavx", etc. for SIMD optimizations.
+// Note that GCC and Clang requires "-msse2", "-mavx2", etc. for SIMD optimizations.
 //
 //
 // Building for Windows
@@ -55,6 +52,11 @@
 // The Windows build should compile clean on all popular compilers without the need to configure any include paths
 // nor link to any libraries.
 //
+// Building for macOS
+// ------------------
+// The macOS build should compile clean without the need to download any dependencies or link to any libraries or
+// frameworks.
+//
 // Building for Linux
 // ------------------
 // The Linux build only requires linking to -ldl, -lpthread and -lm. You do not need any development packages for any
@@ -174,6 +176,9 @@
 // #define MAL_NO_JACK
 //   Disables the JACK backend.
 //
+// #define MAL_NO_COREAUDIO
+//   Disables the Core Audio backend.
+//
 // #define MAL_NO_OSS
 //   Disables the OSS backend.
 //
@@ -207,8 +212,8 @@
 // #define MAL_NO_SSE2
 //   Disables SSE2 optimizations.
 //
-// #define MAL_NO_AVX
-//   Disables AVX optimizations.
+// #define MAL_NO_AVX2
+//   Disables AVX2 optimizations.
 //
 // #define MAL_NO_AVX512
 //   Disables AVX-512 optimizations.
@@ -278,9 +283,6 @@ extern "C" {
         #define MAL_SUPPORT_PULSEAUDIO
         #define MAL_SUPPORT_JACK
     #endif
-    #if defined(MAL_APPLE)
-        #define MAL_SUPPORT_COREAUDIO
-    #endif
     #if defined(MAL_ANDROID)
         #define MAL_SUPPORT_OPENSL
     #endif
@@ -288,6 +290,9 @@ extern "C" {
         #define MAL_SUPPORT_OSS
     #endif
 #endif
+#if defined(MAL_APPLE)
+    #define MAL_SUPPORT_COREAUDIO
+#endif
 
 #define MAL_SUPPORT_SDL     // All platforms support SDL.
 
@@ -399,7 +404,7 @@ typedef mal_uint32                  mal_bool32;
 
 typedef void* mal_handle;
 typedef void* mal_ptr;
-typedef void (* mal_proc)();
+typedef void (* mal_proc)(void);
 
 typedef struct mal_context mal_context;
 typedef struct mal_device mal_device;
@@ -625,27 +630,29 @@ typedef int mal_result;
 #define MAL_API_NOT_FOUND                               -8
 #define MAL_DEVICE_BUSY                                 -9
 #define MAL_DEVICE_NOT_INITIALIZED                      -10
-#define MAL_DEVICE_ALREADY_STARTED                      -11
-#define MAL_DEVICE_ALREADY_STARTING                     -12
-#define MAL_DEVICE_ALREADY_STOPPED                      -13
-#define MAL_DEVICE_ALREADY_STOPPING                     -14
-#define MAL_FAILED_TO_MAP_DEVICE_BUFFER                 -15
-#define MAL_FAILED_TO_UNMAP_DEVICE_BUFFER               -16
-#define MAL_FAILED_TO_INIT_BACKEND                      -17
-#define MAL_FAILED_TO_READ_DATA_FROM_CLIENT             -18
-#define MAL_FAILED_TO_READ_DATA_FROM_DEVICE             -19
-#define MAL_FAILED_TO_SEND_DATA_TO_CLIENT               -20
-#define MAL_FAILED_TO_SEND_DATA_TO_DEVICE               -21
-#define MAL_FAILED_TO_OPEN_BACKEND_DEVICE               -22
-#define MAL_FAILED_TO_START_BACKEND_DEVICE              -23
-#define MAL_FAILED_TO_STOP_BACKEND_DEVICE               -24
-#define MAL_FAILED_TO_CONFIGURE_BACKEND_DEVICE          -25
-#define MAL_FAILED_TO_CREATE_MUTEX                      -26
-#define MAL_FAILED_TO_CREATE_EVENT                      -27
-#define MAL_FAILED_TO_CREATE_THREAD                     -28
-#define MAL_INVALID_DEVICE_CONFIG                       -29
-#define MAL_ACCESS_DENIED                               -30
-#define MAL_TOO_LARGE                                   -31
+#define MAL_DEVICE_NOT_STARTED                          -11
+#define MAL_DEVICE_NOT_STOPPED                          -12
+#define MAL_DEVICE_ALREADY_STARTED                      -13
+#define MAL_DEVICE_ALREADY_STARTING                     -14
+#define MAL_DEVICE_ALREADY_STOPPED                      -15
+#define MAL_DEVICE_ALREADY_STOPPING                     -16
+#define MAL_FAILED_TO_MAP_DEVICE_BUFFER                 -17
+#define MAL_FAILED_TO_UNMAP_DEVICE_BUFFER               -18
+#define MAL_FAILED_TO_INIT_BACKEND                      -19
+#define MAL_FAILED_TO_READ_DATA_FROM_CLIENT             -20
+#define MAL_FAILED_TO_READ_DATA_FROM_DEVICE             -21
+#define MAL_FAILED_TO_SEND_DATA_TO_CLIENT               -22
+#define MAL_FAILED_TO_SEND_DATA_TO_DEVICE               -23
+#define MAL_FAILED_TO_OPEN_BACKEND_DEVICE               -24
+#define MAL_FAILED_TO_START_BACKEND_DEVICE              -25
+#define MAL_FAILED_TO_STOP_BACKEND_DEVICE               -26
+#define MAL_FAILED_TO_CONFIGURE_BACKEND_DEVICE          -27
+#define MAL_FAILED_TO_CREATE_MUTEX                      -28
+#define MAL_FAILED_TO_CREATE_EVENT                      -29
+#define MAL_FAILED_TO_CREATE_THREAD                     -30
+#define MAL_INVALID_DEVICE_CONFIG                       -31
+#define MAL_ACCESS_DENIED                               -32
+#define MAL_TOO_LARGE                                   -33
 
 typedef void       (* mal_log_proc) (mal_context* pContext, mal_device* pDevice, const char* message);
 typedef void       (* mal_recv_proc)(mal_device* pDevice, mal_uint32 frameCount, const void* pSamples);
@@ -661,6 +668,7 @@ typedef enum
     mal_backend_alsa,
     mal_backend_pulseaudio,
     mal_backend_jack,
+    mal_backend_coreaudio,
     mal_backend_oss,
     mal_backend_opensl,
     mal_backend_openal,
@@ -754,7 +762,7 @@ typedef union
     int jack;                       // JACK always uses default devices.
 #endif
 #ifdef MAL_SUPPORT_COREAUDIO
-    // TODO: Implement me.
+    char coreaudio[256];            // Core Audio uses a string for identification.
 #endif
 #ifdef MAL_SUPPORT_OSS
     char oss[64];                   // "dev/dsp0", etc. "dev/dsp" for the default device.
@@ -812,6 +820,10 @@ typedef struct
     mal_stream_format streamFormatIn;
     mal_stream_format streamFormatOut;
     mal_dither_mode ditherMode;
+    mal_bool32 noSSE2   : 1;
+    mal_bool32 noAVX2   : 1;
+    mal_bool32 noAVX512 : 1;
+    mal_bool32 noNEON   : 1;
     mal_format_converter_read_proc onRead;
     mal_format_converter_read_deinterleaved_proc onReadDeinterleaved;
     void* pUserData;
@@ -820,6 +832,10 @@ typedef struct
 struct mal_format_converter
 {
     mal_format_converter_config config;
+    mal_bool32 useSSE2   : 1;
+    mal_bool32 useAVX2   : 1;
+    mal_bool32 useAVX512 : 1;
+    mal_bool32 useNEON   : 1;
     void (* onConvertPCM)(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode);
     void (* onInterleavePCM)(void* dst, const void** src, mal_uint64 frameCount, mal_uint32 channels);
     void (* onDeinterleavePCM)(void** dst, const void* src, mal_uint64 frameCount, mal_uint32 channels);
@@ -838,7 +854,7 @@ typedef struct
     mal_channel channelMapOut[MAL_MAX_CHANNELS];
     mal_channel_mix_mode mixingMode;
     mal_bool32 noSSE2   : 1;
-    mal_bool32 noAVX    : 1;
+    mal_bool32 noAVX2   : 1;
     mal_bool32 noAVX512 : 1;
     mal_bool32 noNEON   : 1;
     mal_channel_router_read_deinterleaved_proc onReadDeinterleaved;
@@ -851,7 +867,7 @@ struct mal_channel_router
     mal_bool32 isPassthrough   : 1;
     mal_bool32 isSimpleShuffle : 1;
     mal_bool32 useSSE2         : 1;
-    mal_bool32 useAVX          : 1;
+    mal_bool32 useAVX2         : 1;
     mal_bool32 useAVX512       : 1;
     mal_bool32 useNEON         : 1;
     mal_uint8 shuffleTable[MAL_MAX_CHANNELS];
@@ -885,6 +901,10 @@ typedef struct
     mal_uint32 sampleRateOut;
     mal_uint32 channels;
     mal_src_algorithm algorithm;
+    mal_bool32 noSSE2   : 1;
+    mal_bool32 noAVX2   : 1;
+    mal_bool32 noAVX512 : 1;
+    mal_bool32 noNEON   : 1;
     mal_src_read_deinterleaved_proc onReadDeinterleaved;
     void* pUserData;
     union
@@ -914,11 +934,15 @@ MAL_ALIGNED_STRUCT(MAL_SIMD_ALIGNMENT) mal_src
             float timeIn;
             mal_uint32 inputFrameCount;     // The number of frames sitting in the input buffer, not including the first half of the window.
             mal_uint32 windowPosInSamples;  // An offset of <input>.
-            float table[MAL_SRC_SINC_MAX_WINDOW_WIDTH * MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION]; // Precomputed lookup table.
+            float table[MAL_SRC_SINC_MAX_WINDOW_WIDTH*1 * MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION]; // Precomputed lookup table. The +1 is used to avoid the need for an overflow check.
         } sinc;
     };
 
     mal_src_config config;
+    mal_bool32 useSSE2   : 1;
+    mal_bool32 useAVX2   : 1;
+    mal_bool32 useAVX512 : 1;
+    mal_bool32 useNEON   : 1;
 };
 
 typedef struct mal_dsp mal_dsp;
@@ -938,6 +962,10 @@ typedef struct
     mal_dither_mode ditherMode;
     mal_src_algorithm srcAlgorithm;
     mal_bool32 allowDynamicSampleRate;
+    mal_bool32 noSSE2   : 1;
+    mal_bool32 noAVX2   : 1;
+    mal_bool32 noAVX512 : 1;
+    mal_bool32 noNEON   : 1;
     mal_dsp_read_proc onRead;
     void* pUserData;
     union
@@ -1205,7 +1233,25 @@ struct mal_context
 #ifdef MAL_SUPPORT_COREAUDIO
         struct
         {
-            int _unused;
+            mal_handle hCoreFoundation;
+            mal_proc CFStringGetCString;
+            
+            mal_handle hCoreAudio;
+            mal_proc AudioObjectGetPropertyData;
+            mal_proc AudioObjectGetPropertyDataSize;
+            mal_proc AudioObjectSetPropertyData;
+            
+            mal_handle hAudioToolbox;
+            mal_proc AudioComponentFindNext;
+            mal_proc AudioComponentInstanceDispose;
+            mal_proc AudioComponentInstanceNew;
+            mal_proc AudioOutputUnitStart;
+            mal_proc AudioOutputUnitStop;
+            mal_proc AudioUnitAddPropertyListener;
+            mal_proc AudioUnitGetProperty;
+            mal_proc AudioUnitSetProperty;
+            mal_proc AudioUnitInitialize;
+            mal_proc AudioUnitRender;
         } coreaudio;
 #endif
 #ifdef MAL_SUPPORT_OSS
@@ -1494,7 +1540,10 @@ MAL_ALIGNED_STRUCT(MAL_SIMD_ALIGNMENT) mal_device
 #ifdef MAL_SUPPORT_COREAUDIO
         struct
         {
-            int _unused;
+            mal_uint32 deviceObjectID;
+            /*AudioComponent*/ mal_ptr component;   // <-- Can this be per-context?
+            /*AudioUnit*/ mal_ptr audioUnit;
+            /*AudioBufferList**/ mal_ptr pAudioBufferList;  // Only used for input devices.
         } coreaudio;
 #endif
 #ifdef MAL_SUPPORT_OSS
@@ -1569,6 +1618,7 @@ MAL_ALIGNED_STRUCT(MAL_SIMD_ALIGNMENT) mal_device
 //   - WASAPI
 //   - DirectSound
 //   - WinMM
+//   - Core Audio (macOS, iOS)
 //   - OSS
 //   - PulseAudio
 //   - ALSA
@@ -1868,7 +1918,7 @@ mal_context_config mal_context_config_init(mal_log_proc onLog);
 //
 // mal_device_config_init(), mal_device_config_init_playback(), etc. will allow you to explicitly set the sample format,
 // channel count, etc.
-mal_device_config mal_device_config_init_default();
+mal_device_config mal_device_config_init_default(void);
 mal_device_config mal_device_config_init_default_capture(mal_recv_proc onRecvCallback);
 mal_device_config mal_device_config_init_default_playback(mal_send_proc onSendCallback);
 
@@ -2028,7 +2078,7 @@ mal_uint64 mal_format_converter_read_deinterleaved(mal_format_converter* pConver
 
 
 // Helper for initializing a format converter config.
-mal_format_converter_config mal_format_converter_config_init_new();
+mal_format_converter_config mal_format_converter_config_init_new(void);
 mal_format_converter_config mal_format_converter_config_init(mal_format formatIn, mal_format formatOut, mal_uint32 channels, mal_format_converter_read_proc onRead, void* pUserData);
 mal_format_converter_config mal_format_converter_config_init_deinterleaved(mal_format formatIn, mal_format formatOut, mal_uint32 channels, mal_format_converter_read_deinterleaved_proc onReadDeinterleaved, void* pUserData);
 
@@ -2134,7 +2184,7 @@ mal_uint64 mal_src_read_deinterleaved(mal_src* pSRC, mal_uint64 frameCount, void
 
 
 // Helper for creating a sample rate conversion config.
-mal_src_config mal_src_config_init_new();
+mal_src_config mal_src_config_init_new(void);
 mal_src_config mal_src_config_init(mal_uint32 sampleRateIn, mal_uint32 sampleRateOut, mal_uint32 channels, mal_src_read_deinterleaved_proc onReadDeinterleaved, void* pUserData);
 
 
@@ -2164,7 +2214,7 @@ mal_result mal_dsp_set_output_sample_rate(mal_dsp* pDSP, mal_uint32 sampleRateOu
 mal_uint64 mal_dsp_read(mal_dsp* pDSP, mal_uint64 frameCount, void* pFramesOut, void* pUserData);
 
 // Helper for initializing a mal_dsp_config object.
-mal_dsp_config mal_dsp_config_init_new();
+mal_dsp_config mal_dsp_config_init_new(void);
 mal_dsp_config mal_dsp_config_init(mal_format formatIn, mal_uint32 channelsIn, mal_uint32 sampleRateIn, mal_format formatOut, mal_uint32 channelsOut, mal_uint32 sampleRateOut, mal_dsp_read_proc onRead, void* pUserData);
 mal_dsp_config mal_dsp_config_init_ex(mal_format formatIn, mal_uint32 channelsIn, mal_uint32 sampleRateIn, mal_channel channelMapIn[MAL_MAX_CHANNELS], mal_format formatOut, mal_uint32 channelsOut, mal_uint32 sampleRateOut,  mal_channel channelMapOut[MAL_MAX_CHANNELS], mal_dsp_read_proc onRead, void* pUserData);
 
@@ -2237,7 +2287,7 @@ void mal_blend_f32(float* pOut, float* pInA, float* pInB, float factor, mal_uint
 // This could be useful for dynamically determining the size of a device's internal buffer based on the speed of the system.
 //
 // This is a slow API because it performs a profiling test.
-float mal_calculate_cpu_speed_factor();
+float mal_calculate_cpu_speed_factor(void);
 
 // Adjust buffer size based on a scaling factor.
 //
@@ -2465,8 +2515,11 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSignWave, mal_uint64 count, float*
         #if !defined(MAL_NO_SSE2)   // Assume all MSVC compilers support SSE2 intrinsics.
             #define MAL_SUPPORT_SSE2
         #endif
-        #if _MSC_VER >= 1600 && !defined(MAL_NO_AVX)    // 2010
-            #define MAL_SUPPORT_AVX
+        //#if _MSC_VER >= 1600 && !defined(MAL_NO_AVX)    // 2010
+        //    #define MAL_SUPPORT_AVX
+        //#endif
+        #if _MSC_VER >= 1700 && !defined(MAL_NO_AVX2)   // 2012
+            #define MAL_SUPPORT_AVX2
         #endif
         #if _MSC_VER >= 1910 && !defined(MAL_NO_AVX512) // 2017
             #define MAL_SUPPORT_AVX512
@@ -2476,8 +2529,11 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSignWave, mal_uint64 count, float*
         #if defined(__SSE2__) && !defined(MAL_NO_SSE2)
             #define MAL_SUPPORT_SSE2
         #endif
-        #if defined(__AVX__) && !defined(MAL_NO_AVX)
-            #define MAL_SUPPORT_AVX
+        //#if defined(__AVX__) && !defined(MAL_NO_AVX)
+        //    #define MAL_SUPPORT_AVX
+        //#endif
+        #if defined(__AVX2__) && !defined(MAL_NO_AVX2)
+            #define MAL_SUPPORT_AVX2
         #endif
         #if defined(__AVX512F__) && !defined(MAL_NO_AVX512)
             #define MAL_SUPPORT_AVX512
@@ -2489,8 +2545,11 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSignWave, mal_uint64 count, float*
         #if !defined(MAL_SUPPORT_SSE2)   && !defined(MAL_NO_SSE2)   && __has_include(<emmintrin.h>)
             #define MAL_SUPPORT_SSE2
         #endif
-        #if !defined(MAL_SUPPORT_AVX)    && !defined(MAL_NO_AVX)    && __has_include(<immintrin.h>)
-            #define MAL_SUPPORT_AVX
+        //#if !defined(MAL_SUPPORT_AVX)    && !defined(MAL_NO_AVX)    && __has_include(<immintrin.h>)
+        //    #define MAL_SUPPORT_AVX
+        //#endif
+        #if !defined(MAL_SUPPORT_AVX2)   && !defined(MAL_NO_AVX2)   && __has_include(<immintrin.h>)
+            #define MAL_SUPPORT_AVX2
         #endif
         #if !defined(MAL_SUPPORT_AVX512) && !defined(MAL_NO_AVX512) && __has_include(<zmmintrin.h>)
             #define MAL_SUPPORT_AVX512
@@ -2499,7 +2558,7 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSignWave, mal_uint64 count, float*
 
     #if defined(MAL_SUPPORT_AVX512)
         #include <immintrin.h>  // Not a mistake. Intentionally including <immintrin.h> instead of <zmmintrin.h> because otherwise the compiler will complain.
-    #elif defined(MAL_SUPPORT_AVX)
+    #elif defined(MAL_SUPPORT_AVX2) || defined(MAL_SUPPORT_AVX)
         #include <immintrin.h>
     #elif defined(MAL_SUPPORT_SSE2)
         #include <emmintrin.h>
@@ -2597,6 +2656,7 @@ static MAL_INLINE mal_bool32 mal_has_sse2()
 #endif
 }
 
+#if 0
 static MAL_INLINE mal_bool32 mal_has_avx()
 {
 #if defined(MAL_SUPPORT_AVX)
@@ -2629,6 +2689,42 @@ static MAL_INLINE mal_bool32 mal_has_avx()
     return MAL_FALSE;           // No compiler support.
 #endif
 }
+#endif
+
+static MAL_INLINE mal_bool32 mal_has_avx2()
+{
+#if defined(MAL_SUPPORT_AVX2)
+    #if (defined(MAL_X64) || defined(MAL_X86)) && !defined(MAL_NO_AVX2)
+        #if defined(_AVX2_) || defined(__AVX2__)
+            return MAL_TRUE;    // If the compiler is allowed to freely generate AVX2 code we can assume support.
+        #else
+            // AVX requires both CPU and OS support.
+            #if defined(MAL_NO_CPUID) || defined(MAL_NO_XGETBV)
+                return MAL_FALSE;
+            #else
+                int info1[4];
+                int info7[4];
+                mal_cpuid(info1, 1);
+                mal_cpuid(info7, 7);
+                if (((info1[2] & (1 << 27)) != 0) && ((info7[1] & (1 << 5)) != 0)) {
+                    mal_uint64 xrc = mal_xgetbv(0);
+                    if ((xrc & 0x06) == 0x06) {
+                        return MAL_TRUE;
+                    } else {
+                        return MAL_FALSE;
+                    }
+                } else {
+                    return MAL_FALSE;
+                }
+            #endif
+        #endif
+    #else
+        return MAL_FALSE;       // AVX is only supported on x86 and x64 architectures.
+    #endif
+#else
+    return MAL_FALSE;           // No compiler support.
+#endif
+}
 
 static MAL_INLINE mal_bool32 mal_has_avx512f()
 {
@@ -2641,9 +2737,11 @@ static MAL_INLINE mal_bool32 mal_has_avx512f()
             #if defined(MAL_NO_CPUID) || defined(MAL_NO_XGETBV)
                 return MAL_FALSE;
             #else
-                int info[4];
-                mal_cpuid(info, 1);
-                if (((info[2] & (1 << 27)) != 0) && ((info[1] & (1 << 16)) != 0)) {
+                int info1[4];
+                int info7[4];
+                mal_cpuid(info1, 1);
+                mal_cpuid(info7, 7);
+                if (((info1[2] & (1 << 27)) != 0) && ((info7[1] & (1 << 16)) != 0)) {
                     mal_uint64 xrc = mal_xgetbv(0);
                     if ((xrc & 0xE6) == 0xE6) {
                         return MAL_TRUE;
@@ -2688,6 +2786,12 @@ static MAL_INLINE mal_bool32 mal_has_neon()
 #ifndef MAL_PI_D
 #define MAL_PI_D    3.14159265358979323846264
 #endif
+#ifndef MAL_TAU
+#define MAL_TAU     6.28318530717958647693f
+#endif
+#ifndef MAL_TAU_D
+#define MAL_TAU_D   6.28318530717958647693
+#endif
 
 // Unfortunately using runtime linking for pthreads causes problems. This has occurred for me when testing on FreeBSD. When
 // using runtime linking, deadlocks can occur (for me it happens when loading data from fread()). It turns out that doing
@@ -2873,6 +2977,18 @@ mal_uint32 g_malStandardSampleRatePriorities[] = {
     MAL_SAMPLE_RATE_384000
 };
 
+mal_format g_malFormatPriorities[] = {
+    mal_format_f32,         // Most common
+    mal_format_s16,
+    
+    //mal_format_s24_32,    // Clean alignment
+    mal_format_s32,
+    
+    mal_format_s24,         // Unclean alignment
+    
+    mal_format_u8           // Low quality
+};
+
 #define MAL_DEFAULT_PLAYBACK_DEVICE_NAME    "Default Playback Device"
 #define MAL_DEFAULT_CAPTURE_DEVICE_NAME     "Default Capture Device"
 
@@ -3169,6 +3285,20 @@ static MAL_INLINE unsigned int mal_round_to_power_of_2(unsigned int x)
     }
 }
 
+static MAL_INLINE unsigned int mal_count_set_bits(unsigned int x)
+{
+    unsigned int count = 0;
+    while (x != 0) {
+        if (x & 1) {
+            count += 1;
+        }
+        
+        x = x >> 1;
+    }
+    
+    return count;
+}
+
 
 
 // Clamps an f32 sample to -1..1
@@ -3185,8 +3315,37 @@ static MAL_INLINE float mal_mix_f32(float x, float y, float a)
 }
 static MAL_INLINE float mal_mix_f32_fast(float x, float y, float a)
 {
-    return x + (y - x)*a;
+    float r0 = (y - x);
+    float r1 = r0*a;
+    return x + r1;
+    //return x + (y - x)*a;
+}
+
+#if defined(MAL_SUPPORT_SSE2)
+static MAL_INLINE __m128 mal_mix_f32_fast__sse2(__m128 x, __m128 y, __m128 a)
+{
+    return _mm_add_ps(x, _mm_mul_ps(_mm_sub_ps(y, x), a));
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+static MAL_INLINE __m256 mal_mix_f32_fast__avx2(__m256 x, __m256 y, __m256 a)
+{
+    return _mm256_add_ps(x, _mm256_mul_ps(_mm256_sub_ps(y, x), a));
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+static MAL_INLINE __m512 mal_mix_f32_fast__avx512(__m512 x, __m512 y, __m512 a)
+{
+    return _mm512_add_ps(x, _mm512_mul_ps(_mm512_sub_ps(y, x), a));
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+static MAL_INLINE float32x4_t mal_mix_f32_fast__neon(float32x4_t x, float32x4_t y, float32x4_t a)
+{
+    return vaddq_f32(x, vmulq_f32(vsubq_f32(y, x), a));
 }
+#endif
+
 
 static MAL_INLINE double mal_mix_f64(double x, double y, double a)
 {
@@ -3251,16 +3410,25 @@ static MAL_INLINE mal_int32 mal_rand_range_s32(mal_int32 lo, mal_int32 hi)
 }
 
 
+static MAL_INLINE float mal_dither_f32_rectangle(float ditherMin, float ditherMax)
+{
+    return mal_rand_range_f32(ditherMin, ditherMax);
+}
+
+static MAL_INLINE float mal_dither_f32_triangle(float ditherMin, float ditherMax)
+{
+    float a = mal_rand_range_f32(ditherMin, 0);
+    float b = mal_rand_range_f32(0, ditherMax);
+    return a + b;
+}
+
 static MAL_INLINE float mal_dither_f32(mal_dither_mode ditherMode, float ditherMin, float ditherMax)
 {
     if (ditherMode == mal_dither_mode_rectangle) {
-        float a = mal_rand_range_f32(ditherMin, ditherMax);
-        return a;
+        return mal_dither_f32_rectangle(ditherMin, ditherMax);
     }
     if (ditherMode == mal_dither_mode_triangle) {
-        float a = mal_rand_range_f32(ditherMin, 0);
-        float b = mal_rand_range_f32(0, ditherMax);
-        return a + b;
+        return mal_dither_f32_triangle(ditherMin, ditherMax);
     }
 
     return 0;
@@ -3286,6 +3454,10 @@ static MAL_INLINE mal_int32 mal_dither_s32(mal_dither_mode ditherMode, mal_int32
 // multiple of the alignment. The alignment must be a power of 2.
 void mal_split_buffer(void* pBuffer, size_t bufferSize, size_t splitCount, size_t alignment, void** ppBuffersOut, size_t* pSplitSizeOut)
 {
+    if (pSplitSizeOut) {
+        *pSplitSizeOut = 0;
+    }
+
     if (pBuffer == NULL || bufferSize == 0 || splitCount == 0) {
         return;
     }
@@ -3358,7 +3530,7 @@ void mal_timer_init(mal_timer* pTimer)
 
     LARGE_INTEGER counter;
     QueryPerformanceCounter(&counter);
-    pTimer->counter = (mal_uint64)counter.QuadPart;
+    pTimer->counter = counter.QuadPart;
 }
 
 double mal_timer_get_time_in_seconds(mal_timer* pTimer)
@@ -3368,10 +3540,10 @@ double mal_timer_get_time_in_seconds(mal_timer* pTimer)
         return 0;
     }
 
-    return (counter.QuadPart - pTimer->counter) / (double)g_mal_TimerFrequency.QuadPart;
+    return (double)(counter.QuadPart - pTimer->counter) / g_mal_TimerFrequency.QuadPart;
 }
 #elif defined(MAL_APPLE) && (__MAC_OS_X_VERSION_MIN_REQUIRED < 101200)
-uint64_t g_mal_TimerFrequency = 0;
+mal_uint64 g_mal_TimerFrequency = 0;
 void mal_timer_init(mal_timer* pTimer)
 {
     mach_timebase_info_data_t baseTime;
@@ -3383,16 +3555,22 @@ void mal_timer_init(mal_timer* pTimer)
 
 double mal_timer_get_time_in_seconds(mal_timer* pTimer)
 {
-    uint64_t newTimeCounter = mach_absolute_time();
-    uint64_t oldTimeCounter = pTimer->counter;
+    mal_uint64 newTimeCounter = mach_absolute_time();
+    mal_uint64 oldTimeCounter = pTimer->counter;
 
     return (newTimeCounter - oldTimeCounter) / g_mal_TimerFrequency;
 }
 #else
+#if defined(CLOCK_MONOTONIC)
+    #define MAL_CLOCK_ID CLOCK_MONOTONIC
+#else
+    #define MAL_CLOCK_ID CLOCK_REALTIME
+#endif
+
 void mal_timer_init(mal_timer* pTimer)
 {
     struct timespec newTime;
-    clock_gettime(CLOCK_MONOTONIC, &newTime);
+    clock_gettime(MAL_CLOCK_ID, &newTime);
 
     pTimer->counter = (newTime.tv_sec * 1000000000) + newTime.tv_nsec;
 }
@@ -3400,7 +3578,7 @@ void mal_timer_init(mal_timer* pTimer)
 double mal_timer_get_time_in_seconds(mal_timer* pTimer)
 {
     struct timespec newTime;
-    clock_gettime(CLOCK_MONOTONIC, &newTime);
+    clock_gettime(MAL_CLOCK_ID, &newTime);
 
     uint64_t newTimeCounter = (newTime.tv_sec * 1000000000) + newTime.tv_nsec;
     uint64_t oldTimeCounter = pTimer->counter;
@@ -3593,8 +3771,10 @@ mal_bool32 mal_thread_create__posix(mal_context* pContext, mal_thread* pThread,
                 scheduler = SCHED_FIFO;
             }
 #endif
+#ifdef MAL_LINUX
         } else {
             scheduler = sched_getscheduler(0);
+#endif
         }
 
         if (scheduler != -1) {
@@ -4135,6 +4315,19 @@ mal_result mal_context__try_get_device_name_by_id(mal_context* pContext, mal_dev
 }
 
 
+mal_uint32 mal_get_format_priority_index(mal_format format) // Lower = better.
+{
+    for (mal_uint32 i = 0; i < mal_countof(g_malFormatPriorities); ++i) {
+        if (g_malFormatPriorities[i] == format) {
+            return i;
+        }
+    }
+
+    // Getting here means the format could not be found or is equal to mal_format_unknown.
+    return (mal_uint32)-1;
+}
+
+
 ///////////////////////////////////////////////////////////////////////////////
 //
 // Null Backend
@@ -4591,7 +4784,10 @@ void mal_channel_mask_to_channel_map__win32(DWORD dwChannelMask, mal_uint32 chan
 }
 
 #ifdef __cplusplus
-#define mal_is_guid_equal(a, b) IsEqualGUID(*((const GUID*)a), *((const GUID*)b))
+mal_bool32 mal_is_guid_equal(const void* a, const void* b)
+{
+    return IsEqualGUID(*(const GUID*)a, *(const GUID*)b);
+}
 #else
 #define mal_is_guid_equal(a, b) IsEqualGUID((const GUID*)a, (const GUID*)b)
 #endif
@@ -12343,137 +12539,1753 @@ mal_result mal_device__stop_backend__jack(mal_device* pDevice)
     if (((mal_jack_deactivate_proc)pContext->jack.jack_deactivate)((mal_jack_client_t*)pDevice->jack.pClient) != 0) {
         return mal_post_error(pDevice, "[JACK] An error occurred when deactivating the JACK client.", MAL_ERROR);
     }
+    
+    mal_device__set_state(pDevice, MAL_STATE_STOPPED);
+    mal_stop_proc onStop = pDevice->onStop;
+    if (onStop) {
+        onStop(pDevice);
+    }
 
     return MAL_SUCCESS;
 }
-#endif
+#endif  // JACK
 
 
 
 ///////////////////////////////////////////////////////////////////////////////
 //
-// OSS Backend
+// Core Audio Backend
 //
 ///////////////////////////////////////////////////////////////////////////////
-#ifdef MAL_HAS_OSS
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/soundcard.h>
+#ifdef MAL_HAS_COREAUDIO
+#include <CoreAudio/CoreAudio.h>
+#include <AudioToolbox/AudioToolbox.h>
 
-#ifndef SNDCTL_DSP_HALT
-#define SNDCTL_DSP_HALT SNDCTL_DSP_RESET
+#include <TargetConditionals.h>
+#if defined(TARGET_OS_OSX)
+    #define MAL_APPLE_DESKTOP
+#elif defined(TARGET_OS_IPHONE)
+    #define MAL_APPLE_MOBILE
 #endif
 
-int mal_open_temp_device__oss()
+// CoreFoundation
+typedef Boolean (* mal_CFStringGetCString_proc)(CFStringRef theString, char* buffer, CFIndex bufferSize, CFStringEncoding encoding);
+
+// CoreAudio
+typedef OSStatus (* mal_AudioObjectGetPropertyData_proc)(AudioObjectID inObjectID, const AudioObjectPropertyAddress* inAddress, UInt32 inQualifierDataSize, const void* inQualifierData, UInt32* ioDataSize, void* outData);
+typedef OSStatus (* mal_AudioObjectGetPropertyDataSize_proc)(AudioObjectID inObjectID, const AudioObjectPropertyAddress* inAddress, UInt32 inQualifierDataSize, const void* inQualifierData, UInt32* outDataSize);
+typedef OSStatus (* mal_AudioObjectSetPropertyData_proc)(AudioObjectID inObjectID, const AudioObjectPropertyAddress* inAddress, UInt32 inQualifierDataSize, const void* inQualifierData, UInt32 inDataSize, const void* inData);
+
+// AudioToolbox
+typedef AudioComponent (* mal_AudioComponentFindNext_proc)(AudioComponent inComponent, const AudioComponentDescription* inDesc);
+typedef OSStatus (* mal_AudioComponentInstanceDispose_proc)(AudioComponentInstance inInstance);
+typedef OSStatus (* mal_AudioComponentInstanceNew_proc)(AudioComponent inComponent, AudioComponentInstance* outInstance);
+typedef OSStatus (* mal_AudioOutputUnitStart_proc)(AudioUnit inUnit);
+typedef OSStatus (* mal_AudioOutputUnitStop_proc)(AudioUnit inUnit);
+typedef OSStatus (* mal_AudioUnitAddPropertyListener_proc)(AudioUnit inUnit, AudioUnitPropertyID inID, AudioUnitPropertyListenerProc inProc, void* inProcUserData);
+typedef OSStatus (* mal_AudioUnitGetProperty_proc)(AudioUnit inUnit, AudioUnitPropertyID inID, AudioUnitScope inScope, AudioUnitElement inElement, void* outData, UInt32* ioDataSize);
+typedef OSStatus (* mal_AudioUnitSetProperty_proc)(AudioUnit inUnit, AudioUnitPropertyID inID, AudioUnitScope inScope, AudioUnitElement inElement, const void* inData, UInt32 inDataSize);
+typedef OSStatus (* mal_AudioUnitInitialize_proc)(AudioUnit inUnit);
+typedef OSStatus (* mal_AudioUnitRender_proc)(AudioUnit inUnit, AudioUnitRenderActionFlags* ioActionFlags, const AudioTimeStamp* inTimeStamp, UInt32 inOutputBusNumber, UInt32 inNumberFrames, AudioBufferList* ioData);
+
+
+#define MAL_COREAUDIO_OUTPUT_BUS    0
+#define MAL_COREAUDIO_INPUT_BUS     1
+
+
+// Core Audio
+//
+// So far, Core Audio has been the worst backend to work with due to being both unintuitive and having almost no documentation
+// apart from comments in the headers (which admittedly are quite good). For my own purposes, and for anybody out there whose
+// needing to figure out how this darn thing works, I'm going to outline a few things here.
+//
+// Since mini_al is a fairly low-level API, one of the things it needs is control over specific devices, and it needs to be
+// able to identify whether or not it can be used as playback and/or capture. The AudioObject API is the only one I've seen
+// that supports this level of detail. There was some public domain sample code I stumbled across that used the AudioComponent
+// and AudioUnit APIs, but I couldn't see anything that gave low-level control over device selection and capabilities (the
+// distinction between playback and capture in particular). Therefore, mini_al is using the AudioObject API.
+//
+// Most (all?) functions in the AudioObject API take a AudioObjectID as it's input. This is the device identifier. When
+// retrieving global information, such as the device list, you use kAudioObjectSystemObject. When retrieving device-specific
+// data, you pass in the ID for that device. In order to retrieve device-specific IDs you need to enumerate over each of the
+// devices. This is done using the AudioObjectGetPropertyDataSize() and AudioObjectGetPropertyData() APIs which seem to be
+// the central APIs for retrieving information about the system and specific devices.
+//
+// To use the AudioObjectGetPropertyData() API you need to use the notion of a property address. A property address is a
+// structure with three variables and is used to identify which property you are getting or setting. The first is the "selector"
+// which is basically the specific property that you're wanting to retrieve or set. The second is the "scope", which is
+// typically set to kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyScopeInput for input-specific properties and
+// kAudioObjectPropertyScopeOutput for output-specific properties. The last is the "element" which is always set to
+// kAudioObjectPropertyElementMaster in mini_al's case. I don't know of any cases where this would be set to anything different.
+//
+// Back to the earlier issue of device retrieval, you first use the AudioObjectGetPropertyDataSize() API to retrieve the size
+// of the raw data which is just a list of AudioDeviceID's. You use the kAudioObjectSystemObject AudioObjectID, and a property
+// address with the kAudioHardwarePropertyDevices selector and the kAudioObjectPropertyScopeGlobal scope. Once you have the
+// size, allocate a block of memory of that size and then call AudioObjectGetPropertyData(). The data is just a list of
+// AudioDeviceID's so just do "dataSize/sizeof(AudioDeviceID)" to know the device count.
+
+mal_result mal_result_from_OSStatus(OSStatus status)
 {
-    // The OSS sample code uses "/dev/mixer" as the device for getting system properties so I'm going to do the same.
-    int fd = open("/dev/mixer", O_RDONLY, 0);
-    if (fd >= 0) {
-        return fd;
+    switch (status)
+    {
+        case kAudioHardwareNoError:                   return MAL_SUCCESS;
+        case kAudioHardwareNotRunningError:           return MAL_DEVICE_NOT_STARTED;
+        case kAudioHardwareUnspecifiedError:          return MAL_ERROR;
+        case kAudioHardwareUnknownPropertyError:      return MAL_INVALID_ARGS;
+        case kAudioHardwareBadPropertySizeError:      return MAL_INVALID_OPERATION;
+        case kAudioHardwareIllegalOperationError:     return MAL_INVALID_OPERATION;
+        case kAudioHardwareBadObjectError:            return MAL_INVALID_ARGS;
+        case kAudioHardwareBadDeviceError:            return MAL_INVALID_ARGS;
+        case kAudioHardwareBadStreamError:            return MAL_INVALID_ARGS;
+        case kAudioHardwareUnsupportedOperationError: return MAL_INVALID_OPERATION;
+        case kAudioDeviceUnsupportedFormatError:      return MAL_FORMAT_NOT_SUPPORTED;
+        case kAudioDevicePermissionsError:            return MAL_ACCESS_DENIED;
+        default:                                      return MAL_ERROR;
     }
+}
 
-    return -1;
+mal_channel mal_channel_from_AudioChannelBit(AudioChannelBitmap bit)
+{
+    switch (bit)
+    {
+        case kAudioChannelBit_Left:                 return MAL_CHANNEL_LEFT;
+        case kAudioChannelBit_Right:                return MAL_CHANNEL_RIGHT;
+        case kAudioChannelBit_Center:               return MAL_CHANNEL_FRONT_CENTER;
+        case kAudioChannelBit_LFEScreen:            return MAL_CHANNEL_LFE;
+        case kAudioChannelBit_LeftSurround:         return MAL_CHANNEL_BACK_LEFT;
+        case kAudioChannelBit_RightSurround:        return MAL_CHANNEL_BACK_RIGHT;
+        case kAudioChannelBit_LeftCenter:           return MAL_CHANNEL_FRONT_LEFT_CENTER;
+        case kAudioChannelBit_RightCenter:          return MAL_CHANNEL_FRONT_RIGHT_CENTER;
+        case kAudioChannelBit_CenterSurround:       return MAL_CHANNEL_BACK_CENTER;
+        case kAudioChannelBit_LeftSurroundDirect:   return MAL_CHANNEL_SIDE_LEFT;
+        case kAudioChannelBit_RightSurroundDirect:  return MAL_CHANNEL_SIDE_RIGHT;
+        case kAudioChannelBit_TopCenterSurround:    return MAL_CHANNEL_TOP_CENTER;
+        case kAudioChannelBit_VerticalHeightLeft:   return MAL_CHANNEL_TOP_FRONT_LEFT;
+        case kAudioChannelBit_VerticalHeightCenter: return MAL_CHANNEL_TOP_FRONT_CENTER;
+        case kAudioChannelBit_VerticalHeightRight:  return MAL_CHANNEL_TOP_FRONT_RIGHT;
+        case kAudioChannelBit_TopBackLeft:          return MAL_CHANNEL_TOP_BACK_LEFT;
+        case kAudioChannelBit_TopBackCenter:        return MAL_CHANNEL_TOP_BACK_CENTER;
+        case kAudioChannelBit_TopBackRight:         return MAL_CHANNEL_TOP_BACK_RIGHT;
+        default:                                    return MAL_CHANNEL_NONE;
+    }
+}
+
+mal_channel mal_channel_from_AudioChannelLabel(AudioChannelLabel label)
+{
+    switch (label)
+    {
+        case kAudioChannelLabel_Unknown:              return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_Unused:               return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_UseCoordinates:       return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_Left:                 return MAL_CHANNEL_LEFT;
+        case kAudioChannelLabel_Right:                return MAL_CHANNEL_RIGHT;
+        case kAudioChannelLabel_Center:               return MAL_CHANNEL_FRONT_CENTER;
+        case kAudioChannelLabel_LFEScreen:            return MAL_CHANNEL_LFE;
+        case kAudioChannelLabel_LeftSurround:         return MAL_CHANNEL_BACK_LEFT;
+        case kAudioChannelLabel_RightSurround:        return MAL_CHANNEL_BACK_RIGHT;
+        case kAudioChannelLabel_LeftCenter:           return MAL_CHANNEL_FRONT_LEFT_CENTER;
+        case kAudioChannelLabel_RightCenter:          return MAL_CHANNEL_FRONT_RIGHT_CENTER;
+        case kAudioChannelLabel_CenterSurround:       return MAL_CHANNEL_BACK_CENTER;
+        case kAudioChannelLabel_LeftSurroundDirect:   return MAL_CHANNEL_SIDE_LEFT;
+        case kAudioChannelLabel_RightSurroundDirect:  return MAL_CHANNEL_SIDE_RIGHT;
+        case kAudioChannelLabel_TopCenterSurround:    return MAL_CHANNEL_TOP_CENTER;
+        case kAudioChannelLabel_VerticalHeightLeft:   return MAL_CHANNEL_TOP_FRONT_LEFT;
+        case kAudioChannelLabel_VerticalHeightCenter: return MAL_CHANNEL_TOP_FRONT_CENTER;
+        case kAudioChannelLabel_VerticalHeightRight:  return MAL_CHANNEL_TOP_FRONT_RIGHT;
+        case kAudioChannelLabel_TopBackLeft:          return MAL_CHANNEL_TOP_BACK_LEFT;
+        case kAudioChannelLabel_TopBackCenter:        return MAL_CHANNEL_TOP_BACK_CENTER;
+        case kAudioChannelLabel_TopBackRight:         return MAL_CHANNEL_TOP_BACK_RIGHT;
+        case kAudioChannelLabel_RearSurroundLeft:     return MAL_CHANNEL_BACK_LEFT;
+        case kAudioChannelLabel_RearSurroundRight:    return MAL_CHANNEL_BACK_RIGHT;
+        case kAudioChannelLabel_LeftWide:             return MAL_CHANNEL_SIDE_LEFT;
+        case kAudioChannelLabel_RightWide:            return MAL_CHANNEL_SIDE_RIGHT;
+        case kAudioChannelLabel_LFE2:                 return MAL_CHANNEL_LFE;
+        case kAudioChannelLabel_LeftTotal:            return MAL_CHANNEL_LEFT;
+        case kAudioChannelLabel_RightTotal:           return MAL_CHANNEL_RIGHT;
+        case kAudioChannelLabel_HearingImpaired:      return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_Narration:            return MAL_CHANNEL_MONO;
+        case kAudioChannelLabel_Mono:                 return MAL_CHANNEL_MONO;
+        case kAudioChannelLabel_DialogCentricMix:     return MAL_CHANNEL_MONO;
+        case kAudioChannelLabel_CenterSurroundDirect: return MAL_CHANNEL_BACK_CENTER;
+        case kAudioChannelLabel_Haptic:               return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_Ambisonic_W:          return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_Ambisonic_X:          return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_Ambisonic_Y:          return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_Ambisonic_Z:          return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_MS_Mid:               return MAL_CHANNEL_LEFT;
+        case kAudioChannelLabel_MS_Side:              return MAL_CHANNEL_RIGHT;
+        case kAudioChannelLabel_XY_X:                 return MAL_CHANNEL_LEFT;
+        case kAudioChannelLabel_XY_Y:                 return MAL_CHANNEL_RIGHT;
+        case kAudioChannelLabel_HeadphonesLeft:       return MAL_CHANNEL_LEFT;
+        case kAudioChannelLabel_HeadphonesRight:      return MAL_CHANNEL_RIGHT;
+        case kAudioChannelLabel_ClickTrack:           return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_ForeignLanguage:      return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_Discrete:             return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_Discrete_0:           return MAL_CHANNEL_AUX_0;
+        case kAudioChannelLabel_Discrete_1:           return MAL_CHANNEL_AUX_1;
+        case kAudioChannelLabel_Discrete_2:           return MAL_CHANNEL_AUX_2;
+        case kAudioChannelLabel_Discrete_3:           return MAL_CHANNEL_AUX_3;
+        case kAudioChannelLabel_Discrete_4:           return MAL_CHANNEL_AUX_4;
+        case kAudioChannelLabel_Discrete_5:           return MAL_CHANNEL_AUX_5;
+        case kAudioChannelLabel_Discrete_6:           return MAL_CHANNEL_AUX_6;
+        case kAudioChannelLabel_Discrete_7:           return MAL_CHANNEL_AUX_7;
+        case kAudioChannelLabel_Discrete_8:           return MAL_CHANNEL_AUX_8;
+        case kAudioChannelLabel_Discrete_9:           return MAL_CHANNEL_AUX_9;
+        case kAudioChannelLabel_Discrete_10:          return MAL_CHANNEL_AUX_10;
+        case kAudioChannelLabel_Discrete_11:          return MAL_CHANNEL_AUX_11;
+        case kAudioChannelLabel_Discrete_12:          return MAL_CHANNEL_AUX_12;
+        case kAudioChannelLabel_Discrete_13:          return MAL_CHANNEL_AUX_13;
+        case kAudioChannelLabel_Discrete_14:          return MAL_CHANNEL_AUX_14;
+        case kAudioChannelLabel_Discrete_15:          return MAL_CHANNEL_AUX_15;
+        case kAudioChannelLabel_Discrete_65535:       return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_HOA_ACN:              return MAL_CHANNEL_NONE;
+        case kAudioChannelLabel_HOA_ACN_0:            return MAL_CHANNEL_AUX_0;
+        case kAudioChannelLabel_HOA_ACN_1:            return MAL_CHANNEL_AUX_1;
+        case kAudioChannelLabel_HOA_ACN_2:            return MAL_CHANNEL_AUX_2;
+        case kAudioChannelLabel_HOA_ACN_3:            return MAL_CHANNEL_AUX_3;
+        case kAudioChannelLabel_HOA_ACN_4:            return MAL_CHANNEL_AUX_4;
+        case kAudioChannelLabel_HOA_ACN_5:            return MAL_CHANNEL_AUX_5;
+        case kAudioChannelLabel_HOA_ACN_6:            return MAL_CHANNEL_AUX_6;
+        case kAudioChannelLabel_HOA_ACN_7:            return MAL_CHANNEL_AUX_7;
+        case kAudioChannelLabel_HOA_ACN_8:            return MAL_CHANNEL_AUX_8;
+        case kAudioChannelLabel_HOA_ACN_9:            return MAL_CHANNEL_AUX_9;
+        case kAudioChannelLabel_HOA_ACN_10:           return MAL_CHANNEL_AUX_10;
+        case kAudioChannelLabel_HOA_ACN_11:           return MAL_CHANNEL_AUX_11;
+        case kAudioChannelLabel_HOA_ACN_12:           return MAL_CHANNEL_AUX_12;
+        case kAudioChannelLabel_HOA_ACN_13:           return MAL_CHANNEL_AUX_13;
+        case kAudioChannelLabel_HOA_ACN_14:           return MAL_CHANNEL_AUX_14;
+        case kAudioChannelLabel_HOA_ACN_15:           return MAL_CHANNEL_AUX_15;
+        case kAudioChannelLabel_HOA_ACN_65024:        return MAL_CHANNEL_NONE;
+        default:                                      return MAL_CHANNEL_NONE;
+    }
+}
+
+mal_result mal_format_from_AudioStreamBasicDescription(const AudioStreamBasicDescription* pDescription, mal_format* pFormatOut)
+{
+    mal_assert(pDescription != NULL);
+    mal_assert(pFormatOut != NULL);
+    
+    *pFormatOut = mal_format_unknown;   // Safety.
+    
+    // There's a few things mini_al doesn't support.
+    if (pDescription->mFormatID != kAudioFormatLinearPCM) {
+        return MAL_FORMAT_NOT_SUPPORTED;
+    }
+    
+    // We don't support any non-packed formats that are aligned high.
+    if ((pDescription->mFormatFlags & kLinearPCMFormatFlagIsAlignedHigh) != 0) {
+        return MAL_FORMAT_NOT_SUPPORTED;
+    }
+    
+    // Big-endian formats are not currently supported, but will be added in a future version of mini_al.
+    if ((pDescription->mFormatFlags & kLinearPCMFormatFlagIsAlignedHigh) != 0) {
+        return MAL_FORMAT_NOT_SUPPORTED;
+    }
+    
+    // We are not currently supporting non-interleaved formats (this will be added in a future version of mini_al).
+    if ((pDescription->mFormatFlags & kAudioFormatFlagIsNonInterleaved) != 0) {
+        return MAL_FORMAT_NOT_SUPPORTED;
+    }
+
+    if ((pDescription->mFormatFlags & kLinearPCMFormatFlagIsFloat) != 0) {
+        if (pDescription->mBitsPerChannel == 32) {
+            *pFormatOut = mal_format_f32;
+            return MAL_SUCCESS;
+        }
+    } else {
+        if ((pDescription->mFormatFlags & kLinearPCMFormatFlagIsSignedInteger) != 0) {
+            if (pDescription->mBitsPerChannel == 16) {
+                *pFormatOut = mal_format_s16;
+                return MAL_SUCCESS;
+            } else if (pDescription->mBitsPerChannel == 24) {
+                if (pDescription->mBytesPerFrame == (pDescription->mBitsPerChannel/8 * pDescription->mChannelsPerFrame)) {
+                    *pFormatOut = mal_format_s24;
+                    return MAL_SUCCESS;
+                } else {
+                    if (pDescription->mBytesPerFrame/pDescription->mChannelsPerFrame == sizeof(mal_int32)) {
+                        // TODO: Implement mal_format_s24_32.
+                        //*pFormatOut = mal_format_s24_32;
+                        //return MAL_SUCCESS;
+                        return MAL_FORMAT_NOT_SUPPORTED;
+                    }
+                }
+            } else if (pDescription->mBitsPerChannel == 32) {
+                *pFormatOut = mal_format_s32;
+                return MAL_SUCCESS;
+            }
+        } else {
+            if (pDescription->mBitsPerChannel == 8) {
+                *pFormatOut = mal_format_u8;
+                return MAL_SUCCESS;
+            }
+        }
+    }
+    
+    // Getting here means the format is not supported.
+    return MAL_FORMAT_NOT_SUPPORTED;
 }
 
-mal_result mal_context_open_device__oss(mal_context* pContext, mal_device_type type, const mal_device_id* pDeviceID, int* pfd)
+mal_result mal_get_device_object_ids__coreaudio(mal_context* pContext, UInt32* pDeviceCount, AudioObjectID** ppDeviceObjectIDs) // NOTE: Free the returned buffer with mal_free().
 {
     mal_assert(pContext != NULL);
-    mal_assert(pfd != NULL);
+    mal_assert(pDeviceCount != NULL);
+    mal_assert(ppDeviceObjectIDs != NULL);
     (void)pContext;
 
-    *pfd = -1;
+    // Safety.
+    *pDeviceCount = 0;
+    *ppDeviceObjectIDs = NULL;
+    
+    AudioObjectPropertyAddress propAddressDevices;
+    propAddressDevices.mSelector = kAudioHardwarePropertyDevices;
+    propAddressDevices.mScope    = kAudioObjectPropertyScopeGlobal;
+    propAddressDevices.mElement  = kAudioObjectPropertyElementMaster;
 
-    char deviceName[64];
-    if (pDeviceID != NULL) {
-        mal_strncpy_s(deviceName, sizeof(deviceName), pDeviceID->oss, (size_t)-1);
-    } else {
-        mal_strncpy_s(deviceName, sizeof(deviceName), "/dev/dsp", (size_t)-1);
+    UInt32 deviceObjectsDataSize;
+    OSStatus status = ((mal_AudioObjectGetPropertyDataSize_proc)pContext->coreaudio.AudioObjectGetPropertyDataSize)(kAudioObjectSystemObject, &propAddressDevices, 0, NULL, &deviceObjectsDataSize);
+    if (status != noErr) {
+        return mal_result_from_OSStatus(status);
     }
-
-    *pfd = open(deviceName, (type == mal_device_type_playback) ? O_WRONLY : O_RDONLY, 0);
-    if (*pfd == -1) {
-        return MAL_FAILED_TO_OPEN_BACKEND_DEVICE;
+    
+    AudioObjectID* pDeviceObjectIDs = (AudioObjectID*)mal_malloc(deviceObjectsDataSize);
+    if (pDeviceObjectIDs == NULL) {
+        return MAL_OUT_OF_MEMORY;
     }
-
+    
+    status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(kAudioObjectSystemObject, &propAddressDevices, 0, NULL, &deviceObjectsDataSize, pDeviceObjectIDs);
+    if (status != noErr) {
+        mal_free(pDeviceObjectIDs);
+        return mal_result_from_OSStatus(status);
+    }
+    
+    *pDeviceCount = deviceObjectsDataSize / sizeof(AudioObjectID);
+    *ppDeviceObjectIDs = pDeviceObjectIDs;
     return MAL_SUCCESS;
 }
 
-mal_bool32 mal_context_is_device_id_equal__oss(mal_context* pContext, const mal_device_id* pID0, const mal_device_id* pID1)
+mal_result mal_get_AudioObject_uid_as_CFStringRef(mal_context* pContext, AudioObjectID objectID, CFStringRef* pUID)
 {
     mal_assert(pContext != NULL);
-    mal_assert(pID0 != NULL);
-    mal_assert(pID1 != NULL);
-    (void)pContext;
 
-    return mal_strcmp(pID0->oss, pID1->oss) == 0;
+    AudioObjectPropertyAddress propAddress;
+    propAddress.mSelector = kAudioDevicePropertyDeviceUID;
+    propAddress.mScope    = kAudioObjectPropertyScopeGlobal;
+    propAddress.mElement  = kAudioObjectPropertyElementMaster;
+
+    UInt32 dataSize = sizeof(*pUID);
+    OSStatus status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(objectID, &propAddress, 0, NULL, &dataSize, pUID);
+    if (status != noErr) {
+        return mal_result_from_OSStatus(status);
+    }
+    
+    return MAL_SUCCESS;
 }
 
-mal_result mal_context_enumerate_devices__oss(mal_context* pContext, mal_enum_devices_callback_proc callback, void* pUserData)
+mal_result mal_get_AudioObject_uid(mal_context* pContext, AudioObjectID objectID, size_t bufferSize, char* bufferOut)
 {
     mal_assert(pContext != NULL);
-    mal_assert(callback != NULL);
 
-    int fd = mal_open_temp_device__oss();
-    if (fd == -1) {
-        return mal_context_post_error(pContext, NULL, "[OSS] Failed to open a temporary device for retrieving system information used for device enumeration.", MAL_NO_BACKEND);
+    CFStringRef uid;
+    mal_result result = mal_get_AudioObject_uid_as_CFStringRef(pContext, objectID, &uid);
+    if (result != MAL_SUCCESS) {
+        return result;
     }
-
-    oss_sysinfo si;
-    int result = ioctl(fd, SNDCTL_SYSINFO, &si);
-    if (result != -1) {
-        for (int iAudioDevice = 0; iAudioDevice < si.numaudios; ++iAudioDevice) {
-            oss_audioinfo ai;
-            ai.dev = iAudioDevice;
-            result = ioctl(fd, SNDCTL_AUDIOINFO, &ai);
-            if (result != -1) {
-                if (ai.devnode[0] != '\0') {    // <-- Can be blank, according to documentation.
-                    mal_device_info deviceInfo;
-                    mal_zero_object(&deviceInfo);
-
-                    // ID
-                    mal_strncpy_s(deviceInfo.id.oss, sizeof(deviceInfo.id.oss), ai.devnode, (size_t)-1);
-
-                    // The human readable device name should be in the "ai.handle" variable, but it can
-                    // sometimes be empty in which case we just fall back to "ai.name" which is less user
-                    // friendly, but usually has a value.
-                    if (ai.handle[0] != '\0') {
-                        mal_strncpy_s(deviceInfo.name, sizeof(deviceInfo.name), ai.handle, (size_t)-1);
-                    } else {
-                        mal_strncpy_s(deviceInfo.name, sizeof(deviceInfo.name), ai.name, (size_t)-1);
-                    }
-
-                    // The device can be both playback and capture.
-                    mal_bool32 isTerminating = MAL_FALSE;
-                    if (!isTerminating && (ai.caps & PCM_CAP_OUTPUT) != 0) {
-                        isTerminating = !callback(pContext, mal_device_type_playback, &deviceInfo, pUserData);
-                    }
-                    if (!isTerminating && (ai.caps & PCM_CAP_INPUT) != 0) {
-                        isTerminating = !callback(pContext, mal_device_type_capture, &deviceInfo, pUserData);
-                    }
-
-                    if (isTerminating) {
-                        break;
-                    }
-                }
-            }
-        }
-    } else {
-        close(fd);
-        return mal_context_post_error(pContext, NULL, "[OSS] Failed to retrieve system information for device enumeration.", MAL_NO_BACKEND);
+    
+    if (!((mal_CFStringGetCString_proc)pContext->coreaudio.CFStringGetCString)(uid, bufferOut, bufferSize, kCFStringEncodingUTF8)) {
+        return MAL_ERROR;
     }
-
-    close(fd);
+    
     return MAL_SUCCESS;
 }
 
-mal_result mal_context_get_device_info__oss(mal_context* pContext, mal_device_type deviceType, const mal_device_id* pDeviceID, mal_share_mode shareMode, mal_device_info* pDeviceInfo)
+mal_result mal_get_AudioObject_name(mal_context* pContext, AudioObjectID objectID, size_t bufferSize, char* bufferOut)
 {
     mal_assert(pContext != NULL);
-    (void)shareMode;
 
-    // Handle the default device a little differently.
-    if (pDeviceID == NULL) {
+    AudioObjectPropertyAddress propAddress;
+    propAddress.mSelector = kAudioDevicePropertyDeviceNameCFString;
+    propAddress.mScope    = kAudioObjectPropertyScopeGlobal;
+    propAddress.mElement  = kAudioObjectPropertyElementMaster;
+
+    CFStringRef deviceName = NULL;
+    UInt32 dataSize = sizeof(deviceName);
+    OSStatus status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(objectID, &propAddress, 0, NULL, &dataSize, &deviceName);
+    if (status != noErr) {
+        return mal_result_from_OSStatus(status);
+    }
+    
+    if (!((mal_CFStringGetCString_proc)pContext->coreaudio.CFStringGetCString)(deviceName, bufferOut, bufferSize, kCFStringEncodingUTF8)) {
+        return MAL_ERROR;
+    }
+    
+    return MAL_SUCCESS;
+}
+
+mal_bool32 mal_does_AudioObject_support_scope(mal_context* pContext, AudioObjectID deviceObjectID, AudioObjectPropertyScope scope)
+{
+    mal_assert(pContext != NULL);
+
+    // To know whether or not a device is an input device we need ot look at the stream configuration. If it has an output channel it's a
+    // playback device.
+    AudioObjectPropertyAddress propAddress;
+    propAddress.mSelector = kAudioDevicePropertyStreamConfiguration;
+    propAddress.mScope    = scope;
+    propAddress.mElement  = kAudioObjectPropertyElementMaster;
+    
+    UInt32 dataSize;
+    OSStatus status = ((mal_AudioObjectGetPropertyDataSize_proc)pContext->coreaudio.AudioObjectGetPropertyDataSize)(deviceObjectID, &propAddress, 0, NULL, &dataSize);
+    if (status != noErr) {
+        return MAL_FALSE;
+    }
+    
+    AudioBufferList* pBufferList = (AudioBufferList*)mal_malloc(dataSize);
+    if (pBufferList == NULL) {
+        return MAL_FALSE;   // Out of memory.
+    }
+    
+    status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, pBufferList);
+    if (status != noErr) {
+        mal_free(pBufferList);
+        return MAL_FALSE;
+    }
+
+    mal_bool32 isSupported = MAL_FALSE;
+    if (pBufferList->mNumberBuffers > 0) {
+        isSupported = MAL_TRUE;
+    }
+    
+    mal_free(pBufferList);
+    return isSupported;
+}
+
+mal_bool32 mal_does_AudioObject_support_playback(mal_context* pContext, AudioObjectID deviceObjectID)
+{
+    return mal_does_AudioObject_support_scope(pContext, deviceObjectID, kAudioObjectPropertyScopeOutput);
+}
+
+mal_bool32 mal_does_AudioObject_support_capture(mal_context* pContext, AudioObjectID deviceObjectID)
+{
+    return mal_does_AudioObject_support_scope(pContext, deviceObjectID, kAudioObjectPropertyScopeInput);
+}
+
+
+mal_result mal_get_AudioObject_stream_descriptions(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, UInt32* pDescriptionCount, AudioStreamRangedDescription** ppDescriptions)    // NOTE: Free the returned pointer with mal_free().
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pDescriptionCount != NULL);
+    mal_assert(ppDescriptions != NULL);
+    
+    // TODO: Experiment with kAudioStreamPropertyAvailablePhysicalFormats instead of (or in addition to) kAudioStreamPropertyAvailableVirtualFormats. My
+    //       MacBook Pro uses s24/32 format, however, which mini_al does not currently support.
+    AudioObjectPropertyAddress propAddress;
+    propAddress.mSelector = kAudioStreamPropertyAvailableVirtualFormats; //kAudioStreamPropertyAvailablePhysicalFormats;
+    propAddress.mScope    = (deviceType == mal_device_type_playback) ? kAudioObjectPropertyScopeOutput : kAudioObjectPropertyScopeInput;
+    propAddress.mElement  = kAudioObjectPropertyElementMaster;
+    
+    UInt32 dataSize;
+    OSStatus status = ((mal_AudioObjectGetPropertyDataSize_proc)pContext->coreaudio.AudioObjectGetPropertyDataSize)(deviceObjectID, &propAddress, 0, NULL, &dataSize);
+    if (status != noErr) {
+        return mal_result_from_OSStatus(status);
+    }
+    
+    AudioStreamRangedDescription* pDescriptions = (AudioStreamRangedDescription*)mal_malloc(dataSize);
+    if (pDescriptions == NULL) {
+        return MAL_OUT_OF_MEMORY;
+    }
+    
+    status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, pDescriptions);
+    if (status != noErr) {
+        mal_free(pDescriptions);
+        return mal_result_from_OSStatus(status);
+    }
+    
+    *pDescriptionCount = dataSize / sizeof(*pDescriptions);
+    *ppDescriptions = pDescriptions;
+    return MAL_SUCCESS;
+}
+
+
+
+mal_result mal_get_AudioObject_channel_layout(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, AudioChannelLayout** ppChannelLayout)   // NOTE: Free the returned pointer with mal_free().
+{
+    mal_assert(pContext != NULL);
+    mal_assert(ppChannelLayout != NULL);
+    
+    *ppChannelLayout = NULL;    // Safety.
+    
+    AudioObjectPropertyAddress propAddress;
+    propAddress.mSelector = kAudioDevicePropertyPreferredChannelLayout;
+    propAddress.mScope    = (deviceType == mal_device_type_playback) ? kAudioObjectPropertyScopeOutput : kAudioObjectPropertyScopeInput;
+    propAddress.mElement  = kAudioObjectPropertyElementMaster;
+    
+    UInt32 dataSize;
+    OSStatus status = ((mal_AudioObjectGetPropertyDataSize_proc)pContext->coreaudio.AudioObjectGetPropertyDataSize)(deviceObjectID, &propAddress, 0, NULL, &dataSize);
+    if (status != noErr) {
+        return mal_result_from_OSStatus(status);
+    }
+    
+    AudioChannelLayout* pChannelLayout = (AudioChannelLayout*)mal_malloc(dataSize);
+    if (pChannelLayout == NULL) {
+        return MAL_OUT_OF_MEMORY;
+    }
+    
+    status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, pChannelLayout);
+    if (status != noErr) {
+        mal_free(pChannelLayout);
+        return mal_result_from_OSStatus(status);
+    }
+    
+    *ppChannelLayout = pChannelLayout;
+    return MAL_SUCCESS;
+}
+
+mal_result mal_get_AudioObject_channel_count(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, mal_uint32* pChannelCount)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pChannelCount != NULL);
+    
+    *pChannelCount = 0; // Safety.
+
+    AudioChannelLayout* pChannelLayout;
+    mal_result result = mal_get_AudioObject_channel_layout(pContext, deviceObjectID, deviceType, &pChannelLayout);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    if (pChannelLayout->mChannelLayoutTag == kAudioChannelLayoutTag_UseChannelDescriptions) {
+        *pChannelCount = pChannelLayout->mNumberChannelDescriptions;
+    } else if (pChannelLayout->mChannelLayoutTag == kAudioChannelLayoutTag_UseChannelBitmap) {
+        *pChannelCount = mal_count_set_bits(pChannelLayout->mChannelBitmap);
+    } else {
+        *pChannelCount = AudioChannelLayoutTag_GetNumberOfChannels(pChannelLayout->mChannelLayoutTag);
+    }
+    
+    mal_free(pChannelLayout);
+    return MAL_SUCCESS;
+}
+
+mal_result mal_get_channel_map_from_AudioChannelLayout(AudioChannelLayout* pChannelLayout, mal_channel channelMap[MAL_MAX_CHANNELS])
+{
+    mal_assert(pChannelLayout != NULL);
+    
+    if (pChannelLayout->mChannelLayoutTag == kAudioChannelLayoutTag_UseChannelDescriptions) {
+        for (UInt32 iChannel = 0; iChannel < pChannelLayout->mNumberChannelDescriptions; ++iChannel) {
+            channelMap[iChannel] = mal_channel_from_AudioChannelLabel(pChannelLayout->mChannelDescriptions[iChannel].mChannelLabel);
+        }
+    } else if (pChannelLayout->mChannelLayoutTag == kAudioChannelLayoutTag_UseChannelBitmap) {
+        // This is the same kind of system that's used by Windows audio APIs.
+        UInt32 iChannel = 0;
+        AudioChannelBitmap bitmap = pChannelLayout->mChannelBitmap;
+        for (UInt32 iBit = 0; iBit < 32; ++iBit) {
+            AudioChannelBitmap bit = bitmap & (1 << iBit);
+            if (bit != 0) {
+                channelMap[iChannel++] = mal_channel_from_AudioChannelBit(bit);
+            }
+        }
+    } else {
+        // Need to use the tag to determine the channel map. For now I'm just assuming a default channel map, but later on this should
+        // be updated to determine the mapping based on the tag.
+        UInt32 channelCount = AudioChannelLayoutTag_GetNumberOfChannels(pChannelLayout->mChannelLayoutTag);
+        switch (pChannelLayout->mChannelLayoutTag)
+        {
+            case kAudioChannelLayoutTag_Mono:
+            case kAudioChannelLayoutTag_Stereo:
+            case kAudioChannelLayoutTag_StereoHeadphones:
+            case kAudioChannelLayoutTag_MatrixStereo:
+            case kAudioChannelLayoutTag_MidSide:
+            case kAudioChannelLayoutTag_XY:
+            case kAudioChannelLayoutTag_Binaural:
+            case kAudioChannelLayoutTag_Ambisonic_B_Format:
+            {
+                mal_get_standard_channel_map(mal_standard_channel_map_default, channelCount, channelMap);
+            } break;
+            
+            case kAudioChannelLayoutTag_Octagonal:
+            {
+                channelMap[7] = MAL_CHANNEL_SIDE_RIGHT;
+                channelMap[6] = MAL_CHANNEL_SIDE_LEFT;
+            } // Intentional fallthrough.
+            case kAudioChannelLayoutTag_Hexagonal:
+            {
+                channelMap[5] = MAL_CHANNEL_BACK_CENTER;
+            } // Intentional fallthrough.
+            case kAudioChannelLayoutTag_Pentagonal:
+            {
+                channelMap[4] = MAL_CHANNEL_FRONT_CENTER;
+            } // Intentional fallghrough.
+            case kAudioChannelLayoutTag_Quadraphonic:
+            {
+                channelMap[3] = MAL_CHANNEL_BACK_RIGHT;
+                channelMap[2] = MAL_CHANNEL_BACK_LEFT;
+                channelMap[1] = MAL_CHANNEL_RIGHT;
+                channelMap[0] = MAL_CHANNEL_LEFT;
+            } break;
+            
+            // TODO: Add support for more tags here.
+        
+            default:
+            {
+                mal_get_standard_channel_map(mal_standard_channel_map_default, channelCount, channelMap);
+            } break;
+        }
+    }
+    
+    return MAL_SUCCESS;
+}
+
+mal_result mal_get_AudioObject_channel_map(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, mal_channel channelMap[MAL_MAX_CHANNELS])
+{
+    mal_assert(pContext != NULL);
+    
+    AudioChannelLayout* pChannelLayout;
+    mal_result result = mal_get_AudioObject_channel_layout(pContext, deviceObjectID, deviceType, &pChannelLayout);
+    if (result != MAL_SUCCESS) {
+        return result;  // Rather than always failing here, would it be more robust to simply assume a default?
+    }
+    
+    result = mal_get_channel_map_from_AudioChannelLayout(pChannelLayout, channelMap);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    return result;
+}
+
+mal_result mal_get_AudioObject_sample_rates(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, UInt32* pSampleRateRangesCount, AudioValueRange** ppSampleRateRanges)   // NOTE: Free the returned pointer with mal_free().
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pSampleRateRangesCount != NULL);
+    mal_assert(ppSampleRateRanges != NULL);
+  
+    // Safety.
+    *pSampleRateRangesCount = 0;
+    *ppSampleRateRanges = NULL;
+    
+    AudioObjectPropertyAddress propAddress;
+    propAddress.mSelector = kAudioDevicePropertyAvailableNominalSampleRates;
+    propAddress.mScope    = (deviceType == mal_device_type_playback) ? kAudioObjectPropertyScopeOutput : kAudioObjectPropertyScopeInput;
+    propAddress.mElement  = kAudioObjectPropertyElementMaster;
+    
+    UInt32 dataSize;
+    OSStatus status = ((mal_AudioObjectGetPropertyDataSize_proc)pContext->coreaudio.AudioObjectGetPropertyDataSize)(deviceObjectID, &propAddress, 0, NULL, &dataSize);
+    if (status != noErr) {
+        return mal_result_from_OSStatus(status);
+    }
+    
+    AudioValueRange* pSampleRateRanges = (AudioValueRange*)mal_malloc(dataSize);
+    if (pSampleRateRanges == NULL) {
+        return MAL_OUT_OF_MEMORY;
+    }
+    
+    status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, pSampleRateRanges);
+    if (status != noErr) {
+        mal_free(pSampleRateRanges);
+        return mal_result_from_OSStatus(status);
+    }
+    
+    *pSampleRateRangesCount = dataSize / sizeof(*pSampleRateRanges);
+    *ppSampleRateRanges = pSampleRateRanges;
+    return MAL_SUCCESS;
+}
+
+mal_result mal_get_AudioObject_get_closest_sample_rate(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, mal_uint32 sampleRateIn, mal_uint32* pSampleRateOut)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pSampleRateOut != NULL);
+    
+    *pSampleRateOut = 0;    // Safety.
+    
+    UInt32 sampleRateRangeCount;
+    AudioValueRange* pSampleRateRanges;
+    mal_result result = mal_get_AudioObject_sample_rates(pContext, deviceObjectID, deviceType, &sampleRateRangeCount, &pSampleRateRanges);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    if (sampleRateRangeCount == 0) {
+        mal_free(pSampleRateRanges);
+        return MAL_ERROR;   // Should never hit this case should we?
+    }
+    
+    if (sampleRateIn == 0) {
+        // Search in order of mini_al's preferred priority.
+        for (UInt32 iMALSampleRate = 0; iMALSampleRate < mal_countof(g_malStandardSampleRatePriorities); ++iMALSampleRate) {
+            mal_uint32 malSampleRate = g_malStandardSampleRatePriorities[iMALSampleRate];
+            for (UInt32 iCASampleRate = 0; iCASampleRate < sampleRateRangeCount; ++iCASampleRate) {
+                AudioValueRange caSampleRate = pSampleRateRanges[iCASampleRate];
+                if (caSampleRate.mMinimum <= malSampleRate && caSampleRate.mMaximum >= malSampleRate) {
+                    *pSampleRateOut = malSampleRate;
+                    mal_free(pSampleRateRanges);
+                    return MAL_SUCCESS;
+                }
+            }
+        }
+        
+        // If we get here it means none of mini_al's standard sample rates matched any of the supported sample rates from the device. In this
+        // case we just fall back to the first one reported by Core Audio.
+        mal_assert(sampleRateRangeCount > 0);
+        
+        *pSampleRateOut = pSampleRateRanges[0].mMinimum;
+        mal_free(pSampleRateRanges);
+        return MAL_SUCCESS;
+    } else {
+        // Find the closest match to this sample rate.
+        UInt32 currentAbsoluteDifference = INT32_MAX;
+        UInt32 iCurrentClosestRange = (UInt32)-1;
+        for (UInt32 iRange = 0; iRange < sampleRateRangeCount; ++iRange) {
+            if (pSampleRateRanges[iRange].mMinimum <= sampleRateIn && pSampleRateRanges[iRange].mMaximum >= sampleRateIn) {
+                *pSampleRateOut = sampleRateIn;
+                mal_free(pSampleRateRanges);
+                return MAL_SUCCESS;
+            } else {
+                UInt32 absoluteDifference;
+                if (pSampleRateRanges[iRange].mMinimum > sampleRateIn) {
+                    absoluteDifference = pSampleRateRanges[iRange].mMinimum - sampleRateIn;
+                } else {
+                    absoluteDifference = sampleRateIn - pSampleRateRanges[iRange].mMaximum;
+                }
+                
+                if (currentAbsoluteDifference > absoluteDifference) {
+                    currentAbsoluteDifference = absoluteDifference;
+                    iCurrentClosestRange = iRange;
+                }
+            }
+        }
+        
+        mal_assert(iCurrentClosestRange != (UInt32)-1);
+        
+        *pSampleRateOut = pSampleRateRanges[iCurrentClosestRange].mMinimum;
+        mal_free(pSampleRateRanges);
+        return MAL_SUCCESS;
+    }
+    
+    // Should never get here, but it would mean we weren't able to find any suitable sample rates.
+    //mal_free(pSampleRateRanges);
+    //return MAL_ERROR;
+}
+
+
+mal_result mal_get_AudioObject_closest_buffer_size_in_frames(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, mal_uint32 bufferSizeInFramesIn, mal_uint32* pBufferSizeInFramesOut)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pBufferSizeInFramesOut != NULL);
+    
+    *pBufferSizeInFramesOut = 0;    // Safety.
+    
+    AudioObjectPropertyAddress propAddress;
+    propAddress.mSelector = kAudioDevicePropertyBufferFrameSizeRange;
+    propAddress.mScope    = (deviceType == mal_device_type_playback) ? kAudioObjectPropertyScopeOutput : kAudioObjectPropertyScopeInput;
+    propAddress.mElement  = kAudioObjectPropertyElementMaster;
+
+    AudioValueRange bufferSizeRange;
+    UInt32 dataSize = sizeof(bufferSizeRange);
+    OSStatus status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, &bufferSizeRange);
+    if (status != noErr) {
+        return mal_result_from_OSStatus(status);
+    }
+    
+    // This is just a clamp.
+    if (bufferSizeInFramesIn < bufferSizeRange.mMinimum) {
+        *pBufferSizeInFramesOut = (mal_uint32)bufferSizeRange.mMinimum;
+    } else if (bufferSizeInFramesIn > bufferSizeRange.mMaximum) {
+        *pBufferSizeInFramesOut = (mal_uint32)bufferSizeRange.mMaximum;
+    } else {
+        *pBufferSizeInFramesOut = bufferSizeInFramesIn;
+    }
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_set_AudioObject_buffer_size_in_frames(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, mal_uint32* pBufferSizeInOut)
+{
+    mal_assert(pContext != NULL);
+
+    mal_uint32 chosenBufferSizeInFrames;
+    mal_result result = mal_get_AudioObject_closest_buffer_size_in_frames(pContext, deviceObjectID, deviceType, *pBufferSizeInOut, &chosenBufferSizeInFrames);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+
+    // Try setting the size of the buffer... If this fails we just use whatever is currently set.
+    AudioObjectPropertyAddress propAddress;
+    propAddress.mSelector = kAudioDevicePropertyBufferFrameSize;
+    propAddress.mScope    = (deviceType == mal_device_type_playback) ? kAudioObjectPropertyScopeOutput : kAudioObjectPropertyScopeInput;
+    propAddress.mElement  = kAudioObjectPropertyElementMaster;
+    
+    OSStatus status = ((mal_AudioObjectSetPropertyData_proc)pContext->coreaudio.AudioObjectSetPropertyData)(deviceObjectID, &propAddress, 0, NULL, sizeof(chosenBufferSizeInFrames), &chosenBufferSizeInFrames);
+    if (status != noErr) {
+        // Getting here means we were unable to set the buffer size. In this case just use whatever is currently selected.
+        UInt32 dataSize = sizeof(*pBufferSizeInOut);
+        OSStatus status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, pBufferSizeInOut);
+        if (status != noErr) {
+            return mal_result_from_OSStatus(status);
+        }
+    }
+    
+    return MAL_SUCCESS;
+}
+
+
+mal_result mal_find_AudioObjectID(mal_context* pContext, mal_device_type type, const mal_device_id* pDeviceID, AudioObjectID* pDeviceObjectID)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pDeviceObjectID != NULL);
+
+    // Safety.
+    *pDeviceObjectID = 0;
+    
+    if (pDeviceID == NULL) {
+        // Default device.
+        AudioObjectPropertyAddress propAddressDefaultDevice;
+        propAddressDefaultDevice.mScope = kAudioObjectPropertyScopeGlobal;
+        propAddressDefaultDevice.mElement = kAudioObjectPropertyElementMaster;
+        if (type == mal_device_type_playback) {
+            propAddressDefaultDevice.mSelector = kAudioHardwarePropertyDefaultOutputDevice;
+        } else {
+            propAddressDefaultDevice.mSelector = kAudioHardwarePropertyDefaultInputDevice;
+        }
+        
+        UInt32 defaultDeviceObjectIDSize = sizeof(AudioObjectID);
+        AudioObjectID defaultDeviceObjectID;
+        OSStatus status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(kAudioObjectSystemObject, &propAddressDefaultDevice, 0, NULL, &defaultDeviceObjectIDSize, &defaultDeviceObjectID);
+        if (status == noErr) {
+            *pDeviceObjectID = defaultDeviceObjectID;
+            return MAL_SUCCESS;
+        }
+    } else {
+        // Explicit device.
+        UInt32 deviceCount;
+        AudioObjectID* pDeviceObjectIDs;
+        mal_result result = mal_get_device_object_ids__coreaudio(pContext, &deviceCount, &pDeviceObjectIDs);
+        if (result != MAL_SUCCESS) {
+            return result;
+        }
+        
+        for (UInt32 iDevice = 0; iDevice < deviceCount; ++iDevice) {
+            AudioObjectID deviceObjectID = pDeviceObjectIDs[iDevice];
+            
+            char uid[256];
+            if (mal_get_AudioObject_uid(pContext, deviceObjectID, sizeof(uid), uid) != MAL_SUCCESS) {
+                continue;
+            }
+            
+            if (type == mal_device_type_playback) {
+                if (mal_does_AudioObject_support_playback(pContext, deviceObjectID)) {
+                    if (strcmp(uid, pDeviceID->coreaudio) == 0) {
+                        *pDeviceObjectID = deviceObjectID;
+                        return MAL_SUCCESS;
+                    }
+                }
+            } else {
+                if (mal_does_AudioObject_support_capture(pContext, deviceObjectID)) {
+                    if (strcmp(uid, pDeviceID->coreaudio) == 0) {
+                        *pDeviceObjectID = deviceObjectID;
+                        return MAL_SUCCESS;
+                    }
+                }
+            }
+        }
+    }
+    
+    // If we get here it means we couldn't find the device.
+    return MAL_NO_DEVICE;
+}
+
+
+mal_result mal_device_find_best_format__coreaudio(const mal_device* pDevice, AudioStreamBasicDescription* pFormat)
+{
+    mal_assert(pDevice != NULL);
+    
+    AudioObjectID deviceObjectID = (AudioObjectID)pDevice->coreaudio.deviceObjectID;
+    
+    UInt32 deviceFormatDescriptionCount;
+    AudioStreamRangedDescription* pDeviceFormatDescriptions;
+    mal_result result = mal_get_AudioObject_stream_descriptions(pDevice->pContext, deviceObjectID, pDevice->type, &deviceFormatDescriptionCount, &pDeviceFormatDescriptions);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    mal_uint32 desiredSampleRate = pDevice->sampleRate;
+    if (pDevice->usingDefaultSampleRate) {
+        // When using the device's default sample rate, we get the highest priority standard rate supported by the device. Otherwise
+        // we just use the pre-set rate.
+        for (mal_uint32 iStandardRate = 0; iStandardRate < mal_countof(g_malStandardSampleRatePriorities); ++iStandardRate) {
+            mal_uint32 standardRate = g_malStandardSampleRatePriorities[iStandardRate];
+            
+            mal_bool32 foundRate = MAL_FALSE;
+            for (UInt32 iDeviceRate = 0; iDeviceRate < deviceFormatDescriptionCount; ++iDeviceRate) {
+                mal_uint32 deviceRate = (mal_uint32)pDeviceFormatDescriptions[iDeviceRate].mFormat.mSampleRate;
+                
+                if (deviceRate == standardRate) {
+                    desiredSampleRate = standardRate;
+                    foundRate = MAL_TRUE;
+                    break;
+                }
+            }
+            
+            if (foundRate) {
+                break;
+            }
+        }
+    }
+    
+    mal_uint32 desiredChannelCount = pDevice->channels;
+    if (pDevice->usingDefaultChannels) {
+        mal_get_AudioObject_channel_count(pDevice->pContext, deviceObjectID, pDevice->type, &desiredChannelCount);    // <-- Not critical if this fails.
+    }
+    
+    mal_format desiredFormat = pDevice->format;
+    if (pDevice->usingDefaultFormat) {
+        desiredFormat = g_malFormatPriorities[0];
+    }
+    
+    // If we get here it means we don't have an exact match to what the client is asking for. We'll need to find the closest one. The next
+    // loop will check for formats that have the same sample rate to what we're asking for. If there is, we prefer that one in all cases.
+    AudioStreamBasicDescription bestDeviceFormatSoFar;
+    mal_zero_object(&bestDeviceFormatSoFar);
+    
+    mal_bool32 hasSupportedFormat = MAL_FALSE;
+    for (UInt32 iFormat = 0; iFormat < deviceFormatDescriptionCount; ++iFormat) {
+        mal_format format;
+        mal_result formatResult = mal_format_from_AudioStreamBasicDescription(&pDeviceFormatDescriptions[iFormat].mFormat, &format);
+        if (formatResult == MAL_SUCCESS && format != mal_format_unknown) {
+            hasSupportedFormat = MAL_TRUE;
+            bestDeviceFormatSoFar = pDeviceFormatDescriptions[iFormat].mFormat;
+            break;
+        }
+    }
+    
+    if (!hasSupportedFormat) {
+        return MAL_FORMAT_NOT_SUPPORTED;
+    }
+    
+    
+    for (UInt32 iFormat = 0; iFormat < deviceFormatDescriptionCount; ++iFormat) {
+        AudioStreamBasicDescription thisDeviceFormat = pDeviceFormatDescriptions[iFormat].mFormat;
+    
+        // If the format is not supported by mini_al we need to skip this one entirely.
+        mal_format thisSampleFormat;
+        mal_result formatResult = mal_format_from_AudioStreamBasicDescription(&pDeviceFormatDescriptions[iFormat].mFormat, &thisSampleFormat);
+        if (formatResult != MAL_SUCCESS || thisSampleFormat == mal_format_unknown) {
+            continue;   // The format is not supported by mini_al. Skip.
+        }
+        
+        mal_format bestSampleFormatSoFar;
+        mal_format_from_AudioStreamBasicDescription(&bestDeviceFormatSoFar, &bestSampleFormatSoFar);
+        
+    
+        // Getting here means the format is supported by mini_al which makes this format a candidate.
+        if (thisDeviceFormat.mSampleRate != desiredSampleRate) {
+            // The sample rate does not match, but this format could still be usable, although it's a very low priority. If the best format
+            // so far has an equal sample rate we can just ignore this one.
+            if (bestDeviceFormatSoFar.mSampleRate == desiredSampleRate) {
+                continue;   // The best sample rate so far has the same sample rate as what we requested which means it's still the best so far. Skip this format.
+            } else {
+                // In this case, neither the best format so far nor this one have the same sample rate. Check the channel count next.
+                if (thisDeviceFormat.mChannelsPerFrame != desiredChannelCount) {
+                    // This format has a different sample rate _and_ a different channel count.
+                    if (bestDeviceFormatSoFar.mChannelsPerFrame == desiredChannelCount) {
+                        continue;   // No change to the best format.
+                    } else {
+                        // Both this format and the best so far have different sample rates and different channel counts. Whichever has the
+                        // best format is the new best.
+                        if (mal_get_format_priority_index(thisSampleFormat) < mal_get_format_priority_index(bestSampleFormatSoFar)) {
+                            bestDeviceFormatSoFar = thisDeviceFormat;
+                            continue;
+                        } else {
+                            continue;   // No change to the best format.
+                        }
+                    }
+                } else {
+                    // This format has a different sample rate but the desired channel count.
+                    if (bestDeviceFormatSoFar.mChannelsPerFrame == desiredChannelCount) {
+                        // Both this format and the best so far have the desired channel count. Whichever has the best format is the new best.
+                        if (mal_get_format_priority_index(thisSampleFormat) < mal_get_format_priority_index(bestSampleFormatSoFar)) {
+                            bestDeviceFormatSoFar = thisDeviceFormat;
+                            continue;
+                        } else {
+                            continue;   // No change to the best format for now.
+                        }
+                    } else {
+                        // This format has the desired channel count, but the best so far does not. We have a new best.
+                        bestDeviceFormatSoFar = thisDeviceFormat;
+                        continue;
+                    }
+                }
+            }
+        } else {
+            // The sample rates match which makes this format a very high priority contender. If the best format so far has a different
+            // sample rate it needs to be replaced with this one.
+            if (bestDeviceFormatSoFar.mSampleRate != desiredSampleRate) {
+                bestDeviceFormatSoFar = thisDeviceFormat;
+                continue;
+            } else {
+                // In this case both this format and the best format so far have the same sample rate. Check the channel count next.
+                if (thisDeviceFormat.mChannelsPerFrame == desiredChannelCount) {
+                    // In this case this format has the same channel count as what the client is requesting. If the best format so far has
+                    // a different count, this one becomes the new best.
+                    if (bestDeviceFormatSoFar.mChannelsPerFrame != desiredChannelCount) {
+                        bestDeviceFormatSoFar = thisDeviceFormat;
+                        continue;
+                    } else {
+                        // In this case both this format and the best so far have the ideal sample rate and channel count. Check the format.
+                        if (thisSampleFormat == desiredFormat) {
+                            bestDeviceFormatSoFar = thisDeviceFormat;
+                            break;  // Found the exact match.
+                        } else {
+                            // The formats are different. The new best format is the one with the highest priority format according to mini_al.
+                            if (mal_get_format_priority_index(thisSampleFormat) < mal_get_format_priority_index(bestSampleFormatSoFar)) {
+                                bestDeviceFormatSoFar = thisDeviceFormat;
+                                continue;
+                            } else {
+                                continue;   // No change to the best format for now.
+                            }
+                        }
+                    }
+                } else {
+                    // In this case the channel count is different to what the client has requested. If the best so far has the same channel
+                    // count as the requested count then it remains the best.
+                    if (bestDeviceFormatSoFar.mChannelsPerFrame == desiredChannelCount) {
+                        continue;
+                    } else {
+                        // This is the case where both have the same sample rate (good) but different channel counts. Right now both have about
+                        // the same priority, but we need to compare the format now.
+                        if (thisSampleFormat == bestSampleFormatSoFar) {
+                            if (mal_get_format_priority_index(thisSampleFormat) < mal_get_format_priority_index(bestSampleFormatSoFar)) {
+                                bestDeviceFormatSoFar = thisDeviceFormat;
+                                continue;
+                            } else {
+                                continue;   // No change to the best format for now.
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    *pFormat = bestDeviceFormatSoFar;
+    return MAL_SUCCESS;
+}
+
+
+
+
+mal_bool32 mal_context_is_device_id_equal__coreaudio(mal_context* pContext, const mal_device_id* pID0, const mal_device_id* pID1)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pID0 != NULL);
+    mal_assert(pID1 != NULL);
+    (void)pContext;
+
+    return strcmp(pID0->coreaudio, pID1->coreaudio) == 0;
+}
+
+mal_result mal_context_enumerate_devices__coreaudio(mal_context* pContext, mal_enum_devices_callback_proc callback, void* pUserData)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(callback != NULL);
+    
+    UInt32 deviceCount;
+    AudioObjectID* pDeviceObjectIDs;
+    mal_result result = mal_get_device_object_ids__coreaudio(pContext, &deviceCount, &pDeviceObjectIDs);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+  
+    for (UInt32 iDevice = 0; iDevice < deviceCount; ++iDevice) {
+        AudioObjectID deviceObjectID = pDeviceObjectIDs[iDevice];
+
+        mal_device_info info;
+        mal_zero_object(&info);
+        if (mal_get_AudioObject_uid(pContext, deviceObjectID, sizeof(info.id.coreaudio), info.id.coreaudio) != MAL_SUCCESS) {
+            continue;
+        }
+        if (mal_get_AudioObject_name(pContext, deviceObjectID, sizeof(info.name), info.name) != MAL_SUCCESS) {
+            continue;
+        }
+
+        if (mal_does_AudioObject_support_playback(pContext, deviceObjectID)) {
+            if (!callback(pContext, mal_device_type_playback, &info, pUserData)) {
+                break;
+            }
+        }
+        if (mal_does_AudioObject_support_capture(pContext, deviceObjectID)) {
+            if (!callback(pContext, mal_device_type_capture, &info, pUserData)) {
+                break;
+            }
+        }
+    }
+    
+    mal_free(pDeviceObjectIDs);
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_get_device_info__coreaudio(mal_context* pContext, mal_device_type deviceType, const mal_device_id* pDeviceID, mal_share_mode shareMode, mal_device_info* pDeviceInfo)
+{
+    mal_assert(pContext != NULL);
+    (void)shareMode;
+    (void)pDeviceInfo;
+    
+    AudioObjectID deviceObjectID;
+    mal_result result = mal_find_AudioObjectID(pContext, deviceType, pDeviceID, &deviceObjectID);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    result = mal_get_AudioObject_uid(pContext, deviceObjectID, sizeof(pDeviceInfo->id.coreaudio), pDeviceInfo->id.coreaudio);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    result = mal_get_AudioObject_name(pContext, deviceObjectID, sizeof(pDeviceInfo->name), pDeviceInfo->name);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    // Formats.
+    UInt32 streamDescriptionCount;
+    AudioStreamRangedDescription* pStreamDescriptions;
+    result = mal_get_AudioObject_stream_descriptions(pContext, deviceObjectID, deviceType, &streamDescriptionCount, &pStreamDescriptions);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    for (UInt32 iStreamDescription = 0; iStreamDescription < streamDescriptionCount; ++iStreamDescription) {
+        mal_format format;
+        result = mal_format_from_AudioStreamBasicDescription(&pStreamDescriptions[iStreamDescription].mFormat, &format);
+        if (result != MAL_SUCCESS) {
+            continue;
+        }
+        
+        mal_assert(format != mal_format_unknown);
+        
+        // Make sure the format isn't already in the output list.
+        mal_bool32 exists = MAL_FALSE;
+        for (mal_uint32 iOutputFormat = 0; iOutputFormat < pDeviceInfo->formatCount; ++iOutputFormat) {
+            if (pDeviceInfo->formats[iOutputFormat] == format) {
+                exists = MAL_TRUE;
+                break;
+            }
+        }
+        
+        if (!exists) {
+            pDeviceInfo->formats[pDeviceInfo->formatCount++] = format;
+        }
+    }
+    
+    mal_free(pStreamDescriptions);
+    
+    
+    // Channels.
+    result = mal_get_AudioObject_channel_count(pContext, deviceObjectID, deviceType, &pDeviceInfo->minChannels);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    pDeviceInfo->maxChannels = pDeviceInfo->minChannels;
+    
+    
+    // Sample rates.
+    UInt32 sampleRateRangeCount;
+    AudioValueRange* pSampleRateRanges;
+    result = mal_get_AudioObject_sample_rates(pContext, deviceObjectID, deviceType, &sampleRateRangeCount, &pSampleRateRanges);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    if (sampleRateRangeCount > 0) {
+        pDeviceInfo->minSampleRate = UINT32_MAX;
+        pDeviceInfo->maxSampleRate = 0;
+        for (UInt32 iSampleRate = 0; iSampleRate < sampleRateRangeCount; ++iSampleRate) {
+            if (pDeviceInfo->minSampleRate > pSampleRateRanges[iSampleRate].mMinimum) {
+                pDeviceInfo->minSampleRate = pSampleRateRanges[iSampleRate].mMinimum;
+            }
+            if (pDeviceInfo->maxSampleRate < pSampleRateRanges[iSampleRate].mMaximum) {
+                pDeviceInfo->maxSampleRate = pSampleRateRanges[iSampleRate].mMaximum;
+            }
+        }
+    }
+    
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_init__coreaudio(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    
+#ifndef MAL_NO_RUNTIME_LINKING
+    pContext->coreaudio.hCoreFoundation = mal_dlopen("CoreFoundation.framework/CoreFoundation");
+    if (pContext->coreaudio.hCoreFoundation == NULL) {
+        return MAL_API_NOT_FOUND;
+    }
+    
+    pContext->coreaudio.CFStringGetCString             = mal_dlsym(pContext->coreaudio.hCoreFoundation, "CFStringGetCString");
+    
+    
+    pContext->coreaudio.hCoreAudio = mal_dlopen("CoreAudio.framework/CoreAudio");
+    if (pContext->coreaudio.hCoreAudio == NULL) {
+        mal_dlclose(pContext->coreaudio.hCoreFoundation);
+        return MAL_API_NOT_FOUND;
+    }
+    
+    pContext->coreaudio.AudioObjectGetPropertyData     = mal_dlsym(pContext->coreaudio.hCoreAudio, "AudioObjectGetPropertyData");
+    pContext->coreaudio.AudioObjectGetPropertyDataSize = mal_dlsym(pContext->coreaudio.hCoreAudio, "AudioObjectGetPropertyDataSize");
+    pContext->coreaudio.AudioObjectSetPropertyData     = mal_dlsym(pContext->coreaudio.hCoreAudio, "AudioObjectSetPropertyData");
+    
+    
+    pContext->coreaudio.hAudioToolbox = mal_dlopen("AudioToolbox.framework/AudioToolbox");
+    if (pContext->coreaudio.hAudioToolbox == NULL) {
+        mal_dlclose(pContext->coreaudio.hCoreAudio);
+        mal_dlclose(pContext->coreaudio.hCoreFoundation);
+        return MAL_API_NOT_FOUND;
+    }
+    
+    pContext->coreaudio.AudioComponentFindNext         = mal_dlsym(pContext->coreaudio.hAudioToolbox, "AudioComponentFindNext");
+    pContext->coreaudio.AudioComponentInstanceDispose  = mal_dlsym(pContext->coreaudio.hAudioToolbox, "AudioComponentInstanceDispose");
+    pContext->coreaudio.AudioComponentInstanceNew      = mal_dlsym(pContext->coreaudio.hAudioToolbox, "AudioComponentInstanceNew");
+    pContext->coreaudio.AudioOutputUnitStart           = mal_dlsym(pContext->coreaudio.hAudioToolbox, "AudioOutputUnitStart");
+    pContext->coreaudio.AudioOutputUnitStop            = mal_dlsym(pContext->coreaudio.hAudioToolbox, "AudioOutputUnitStop");
+    pContext->coreaudio.AudioUnitAddPropertyListener   = mal_dlsym(pContext->coreaudio.hAudioToolbox, "AudioUnitAddPropertyListener");
+    pContext->coreaudio.AudioUnitGetProperty           = mal_dlsym(pContext->coreaudio.hAudioToolbox, "AudioUnitGetProperty");
+    pContext->coreaudio.AudioUnitSetProperty           = mal_dlsym(pContext->coreaudio.hAudioToolbox, "AudioUnitSetProperty");
+    pContext->coreaudio.AudioUnitInitialize            = mal_dlsym(pContext->coreaudio.hAudioToolbox, "AudioUnitInitialize");
+    pContext->coreaudio.AudioUnitRender                = mal_dlsym(pContext->coreaudio.hAudioToolbox, "AudioUnitRender");
+#else
+    pContext->coreaudio.CFStringGetCString             = CFStringGetCString;
+    
+    pContext->coreaudio.AudioObjectGetPropertyData     = AudioObjectGetPropertyData;
+    pContext->coreaudio.AudioObjectGetPropertyDataSize = AudioObjectGetPropertyDataSize;
+    pContext->coreaudio.AudioObjectSetPropertyData     = AudioObjectSetPropertyData;
+    
+    pContext->coreaudio.AudioComponentFindNext         = AudioComponentFindNext;
+    pContext->coreaudio.AudioComponentInstanceDispose  = AudioComponentInstanceDispose;
+    pContext->coreaudio.AudioComponentInstanceNew      = AudioComponentInstanceNew;
+    pContext->coreaudio.AudioOutputUnitStart           = AudioOutputUnitStart;
+    pContext->coreaudio.AudioOutputUnitStop            = AudioOutputUnitStop;
+    pContext->coreaudio.AudioUnitAddPropertyListener   = AudioUnitAddPropertyListener;
+    pContext->coreaudio.AudioUnitGetProperty           = AudioUnitGetProperty;
+    pContext->coreaudio.AudioUnitSetProperty           = AudioUnitSetProperty;
+    pContext->coreaudio.AudioUnitInitialize            = AudioUnitInitialize;
+    pContext->coreaudio.AudioUnitRender                = AudioUnitRender;
+#endif
+    
+    pContext->onDeviceIDEqual = mal_context_is_device_id_equal__coreaudio;
+    pContext->onEnumDevices   = mal_context_enumerate_devices__coreaudio;
+    pContext->onGetDeviceInfo = mal_context_get_device_info__coreaudio;
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_uninit__coreaudio(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pContext->backend == mal_backend_coreaudio);
+    
+#ifndef MAL_NO_RUNTIME_LINKING
+    mal_dlclose(pContext->coreaudio.hAudioToolbox);
+    mal_dlclose(pContext->coreaudio.hCoreAudio);
+    mal_dlclose(pContext->coreaudio.hCoreFoundation);
+#endif
+
+    (void)pContext;
+    return MAL_SUCCESS;
+}
+
+void mal_device_uninit__coreaudio(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+    mal_assert(mal_device__get_state(pDevice) == MAL_STATE_UNINITIALIZED);
+    
+    ((mal_AudioComponentInstanceDispose_proc)pDevice->pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+    
+    if (pDevice->coreaudio.pAudioBufferList) {
+        mal_free(pDevice->coreaudio.pAudioBufferList);
+    }
+}
+
+
+OSStatus mal_on_output__coreaudio(void* pUserData, AudioUnitRenderActionFlags* pActionFlags, const AudioTimeStamp* pTimeStamp, UInt32 busNumber, UInt32 frameCount, AudioBufferList* pBufferList)
+{
+    (void)pActionFlags;
+    (void)pTimeStamp;
+    (void)busNumber;
+
+    mal_device* pDevice = (mal_device*)pUserData;
+    mal_assert(pDevice != NULL);
+    
+    // For now we can assume everything is interleaved.
+    for (UInt32 iBuffer = 0; iBuffer < pBufferList->mNumberBuffers; ++iBuffer) {
+        if (pBufferList->mBuffers[iBuffer].mNumberChannels == pDevice->internalChannels) {
+            mal_uint32 frameCountForThisBuffer = pBufferList->mBuffers[iBuffer].mDataByteSize / mal_get_bytes_per_frame(pDevice->internalFormat, pDevice->internalChannels);
+            if (frameCountForThisBuffer > 0) {
+                mal_device__read_frames_from_client(pDevice, frameCountForThisBuffer, pBufferList->mBuffers[iBuffer].mData);
+            }
+        } else {
+            // This case is where the number of channels in the output buffer do not match our internal channels. It could mean that it's
+            // not interleaved, in which case we can't handle right now since mini_al does not yet support non-interleaved streams. We just
+            // output silence here.
+            mal_zero_memory(pBufferList->mBuffers[iBuffer].mData, pBufferList->mBuffers[iBuffer].mDataByteSize);
+        }
+    }
+    
+    return noErr;
+}
+
+OSStatus mal_on_input__coreaudio(void* pUserData, AudioUnitRenderActionFlags* pActionFlags, const AudioTimeStamp* pTimeStamp, UInt32 busNumber, UInt32 frameCount, AudioBufferList* pUnusedBufferList)
+{
+    (void)pActionFlags;
+    (void)pTimeStamp;
+    (void)busNumber;
+    (void)frameCount;
+    (void)pUnusedBufferList;
+    
+    mal_device* pDevice = (mal_device*)pUserData;
+    mal_assert(pDevice != NULL);
+    
+    // I'm not going to trust the input frame count. I'm instead going to base this off the size of the first buffer.
+    UInt32 actualFrameCount = ((AudioBufferList*)pDevice->coreaudio.pAudioBufferList)->mBuffers[0].mDataByteSize / mal_get_bytes_per_sample(pDevice->internalFormat) / ((AudioBufferList*)pDevice->coreaudio.pAudioBufferList)->mBuffers[0].mNumberChannels;
+    if (actualFrameCount == 0) {
+        return noErr;
+    }
+    
+    OSStatus status = ((mal_AudioUnitRender_proc)pDevice->pContext->coreaudio.AudioUnitRender)((AudioUnit)pDevice->coreaudio.audioUnit, pActionFlags, pTimeStamp, busNumber, actualFrameCount, (AudioBufferList*)pDevice->coreaudio.pAudioBufferList);
+    if (status != noErr) {
+        return status;
+    }
+    
+    AudioBufferList* pRenderedBufferList = (AudioBufferList*)pDevice->coreaudio.pAudioBufferList;
+    mal_assert(pRenderedBufferList);
+    
+    // For now we can assume everything is interleaved.
+    for (UInt32 iBuffer = 0; iBuffer < pRenderedBufferList->mNumberBuffers; ++iBuffer) {
+        if (pRenderedBufferList->mBuffers[iBuffer].mNumberChannels == pDevice->internalChannels) {
+            mal_uint32 frameCountForThisBuffer = pRenderedBufferList->mBuffers[iBuffer].mDataByteSize / mal_get_bytes_per_frame(pDevice->internalFormat, pDevice->internalChannels);
+            if (frameCountForThisBuffer > 0) {
+                mal_device__send_frames_to_client(pDevice, frameCountForThisBuffer, pRenderedBufferList->mBuffers[iBuffer].mData);
+            }
+        } else {
+            // This case is where the number of channels in the output buffer do not match our internal channels. It could mean that it's
+            // not interleaved, in which case we can't handle right now since mini_al does not yet support non-interleaved streams.
+        }
+    }
+
+    return noErr;
+}
+
+void on_start_stop__coreaudio(void* pUserData, AudioUnit audioUnit, AudioUnitPropertyID propertyID, AudioUnitScope scope, AudioUnitElement element)
+{
+    (void)propertyID;
+    
+    mal_device* pDevice = (mal_device*)pUserData;
+    mal_assert(pDevice != NULL);
+    
+    UInt32 isRunning;
+    UInt32 isRunningSize = sizeof(isRunning);
+    OSStatus status = ((mal_AudioUnitGetProperty_proc)pDevice->pContext->coreaudio.AudioUnitGetProperty)(audioUnit, kAudioOutputUnitProperty_IsRunning, scope, element, &isRunning, &isRunningSize);
+    if (status != noErr) {
+        return; // Don't really know what to do in this case... just ignore it, I suppose...
+    }
+    
+    if (!isRunning) {
+        mal_stop_proc onStop = pDevice->onStop;
+        if (onStop) {
+            onStop(pDevice);
+        }
+    }
+}
+
+
+mal_result mal_device_init__coreaudio(mal_context* pContext, mal_device_type deviceType, const mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pConfig != NULL);
+    mal_assert(pDevice != NULL);
+    mal_assert(deviceType == mal_device_type_playback || deviceType == mal_device_type_capture);
+    
+    AudioObjectID deviceObjectID;
+    mal_result result = mal_find_AudioObjectID(pContext, deviceType, pDeviceID, &deviceObjectID);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    pDevice->coreaudio.deviceObjectID = deviceObjectID;
+    
+    // Core audio doesn't really use the notion of a period so we can leave this unmodified, but not too over the top.
+    if (pDevice->periods < 1) {
+        pDevice->periods = 1;
+    }
+    if (pDevice->periods > 16) {
+        pDevice->periods = 16;
+    }
+    
+
+    // Audio component.
+    AudioComponentDescription desc;
+    desc.componentType = kAudioUnitType_Output;
+#if defined(TARGET_OS_OSX)
+    desc.componentSubType = kAudioUnitSubType_HALOutput;
+#else
+    desc.componentSubType = kAudioUnitSubType_RemoteIO;
+#endif
+    desc.componentManufacturer = kAudioUnitManufacturer_Apple;
+    desc.componentFlags = 0;
+    desc.componentFlagsMask = 0;
+    
+    pDevice->coreaudio.component = ((mal_AudioComponentFindNext_proc)pContext->coreaudio.AudioComponentFindNext)(NULL, &desc);
+    if (pDevice->coreaudio.component == NULL) {
+        return MAL_FAILED_TO_INIT_BACKEND;
+    }
+    
+    
+    // Audio unit.
+    OSStatus status = ((mal_AudioComponentInstanceNew_proc)pContext->coreaudio.AudioComponentInstanceNew)(pDevice->coreaudio.component, (AudioUnit*)&pDevice->coreaudio.audioUnit);
+    if (status != noErr) {
+        return mal_result_from_OSStatus(status);
+    }
+    
+    
+    // The input/output buses need to be explicitly enabled and disabled. We set the flag based on the output unit first, then we just swap it for input.
+    UInt32 enableIOFlag = 1;
+    if (deviceType == mal_device_type_capture) {
+        enableIOFlag = 0;
+    }
+    
+    status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioOutputUnitProperty_EnableIO, kAudioUnitScope_Output, MAL_COREAUDIO_OUTPUT_BUS, &enableIOFlag, sizeof(enableIOFlag));
+    if (status != noErr) {
+        ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+        return mal_result_from_OSStatus(status);
+    }
+    
+    enableIOFlag = (enableIOFlag == 0) ? 1 : 0;
+    status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioOutputUnitProperty_EnableIO, kAudioUnitScope_Input, MAL_COREAUDIO_INPUT_BUS, &enableIOFlag, sizeof(enableIOFlag));
+    if (status != noErr) {
+        ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+        return mal_result_from_OSStatus(status);
+    }
+    
+    
+    // Set the device to use with this audio unit.
+    status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioOutputUnitProperty_CurrentDevice, kAudioUnitScope_Global, (deviceType == mal_device_type_playback) ? MAL_COREAUDIO_OUTPUT_BUS : MAL_COREAUDIO_INPUT_BUS, &deviceObjectID, sizeof(AudioDeviceID));
+    if (status != noErr) {
+        ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+        return mal_result_from_OSStatus(result);
+    }
+    
+    
+    // Format. This is the hardest part of initialization because there's a few variables to take into account.
+    //   1) The format must be supported by the device.
+    //   2) The format must be supported mini_al.
+    //   3) There's a priority that mini_al prefers.
+    //
+    // Ideally we would like to use a format that's as close to the hardware as possible so we can get as close to a passthrough as possible. The
+    // most important property is the sample rate. mini_al can do format conversion for any sample rate and channel count, but cannot do the same
+    // for the sample data format. If the sample data format is not supported by mini_al it must be ignored completely.
+    {
+        AudioUnitScope   formatScope   = (deviceType == mal_device_type_playback) ? kAudioUnitScope_Input : kAudioUnitScope_Output;
+        AudioUnitElement formatElement = (deviceType == mal_device_type_playback) ? MAL_COREAUDIO_OUTPUT_BUS : MAL_COREAUDIO_INPUT_BUS;
+    
+        AudioStreamBasicDescription bestFormat;
+        result = mal_device_find_best_format__coreaudio(pDevice, &bestFormat);
+        if (result != MAL_SUCCESS) {
+            ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+            return result;
+        }
+        
+        status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioUnitProperty_StreamFormat, formatScope, formatElement, &bestFormat, sizeof(bestFormat));
+        if (status != noErr) {
+            // We failed to set the format, so fall back to the current format of the audio unit.
+            UInt32 propSize = sizeof(bestFormat);
+            status = ((mal_AudioUnitGetProperty_proc)pContext->coreaudio.AudioUnitGetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioUnitProperty_StreamFormat, formatScope, formatElement, &bestFormat, &propSize);
+            if (status != noErr) {
+                ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+                return mal_result_from_OSStatus(status);
+            }
+        }
+        
+        result = mal_format_from_AudioStreamBasicDescription(&bestFormat, &pDevice->internalFormat);
+        if (result != MAL_SUCCESS || pDevice->internalFormat == mal_format_unknown) {
+            ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+            return result;
+        }
+        
+        pDevice->channels = bestFormat.mChannelsPerFrame;
+        pDevice->sampleRate = bestFormat.mSampleRate;
+    }
+    
+    // Internal channel map.
+    result = mal_get_AudioObject_channel_map(pContext, deviceObjectID, deviceType, pDevice->internalChannelMap);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    
+    // Buffer size.
+    mal_uint32 actualBufferSizeInFrames = pDevice->bufferSizeInFrames;
+    if (actualBufferSizeInFrames < pDevice->periods) {
+        actualBufferSizeInFrames = pDevice->periods;
+    }
+    
+    if (pDevice->usingDefaultBufferSize) {
+        // CPU speed is a factor to consider when determine how large of a buffer we need.
+        float fCPUSpeed = mal_calculate_cpu_speed_factor();
+
+        // In my admittedly limited testing, capture latency seems to be about the same as playback with Core Audio, at least on my MacBook Pro. On other
+        // backends, however, this is often different. I am therefore leaving the logic below in place just in case I need to do some capture/playback
+        // specific tweaking.
+        float fDeviceType;
+        if (deviceType == mal_device_type_playback) {
+            fDeviceType = 1.0f;
+        } else {
+            fDeviceType = 1.0f;
+        }
+
+        // Backend tax. Need to fiddle with this.
+        float fBackend = 1.0f;
+
+        actualBufferSizeInFrames = mal_calculate_default_buffer_size_in_frames(pConfig->performanceProfile, pConfig->sampleRate, fCPUSpeed*fDeviceType*fBackend);
+        if (actualBufferSizeInFrames < pDevice->periods) {
+            actualBufferSizeInFrames = pDevice->periods;
+        }
+    }
+    
+    actualBufferSizeInFrames = actualBufferSizeInFrames / pDevice->periods;
+    result = mal_set_AudioObject_buffer_size_in_frames(pContext, deviceObjectID, deviceType, &actualBufferSizeInFrames);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+    
+    pDevice->bufferSizeInFrames = actualBufferSizeInFrames * pDevice->periods;
+    
+
+    // Callbacks.
+    AURenderCallbackStruct callbackInfo;
+    callbackInfo.inputProcRefCon = pDevice;
+    if (deviceType == mal_device_type_playback) {
+        callbackInfo.inputProc = mal_on_output__coreaudio;
+        status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Global, MAL_COREAUDIO_OUTPUT_BUS, &callbackInfo, sizeof(callbackInfo));
+        if (status != noErr) {
+            ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+            return mal_result_from_OSStatus(status);
+        }
+    } else {
+        callbackInfo.inputProc = mal_on_input__coreaudio;
+        status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioOutputUnitProperty_SetInputCallback, kAudioUnitScope_Global, MAL_COREAUDIO_INPUT_BUS, &callbackInfo, sizeof(callbackInfo));
+        if (status != noErr) {
+            ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+            return mal_result_from_OSStatus(status);
+        }
+    }
+    
+    // We need to listen for stop events.
+    status = ((mal_AudioUnitAddPropertyListener_proc)pContext->coreaudio.AudioUnitAddPropertyListener)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioOutputUnitProperty_IsRunning, on_start_stop__coreaudio, pDevice);
+    if (status != noErr) {
+        ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+        return mal_result_from_OSStatus(status);
+    }
+    
+    
+    // We need a buffer list if this is an input device. We render into this in the input callback.
+    if (deviceType == mal_device_type_capture) {
+        mal_bool32 isInterleaved = MAL_TRUE;    // TODO: Add support for non-interleaved streams.
+        
+        size_t allocationSize = sizeof(AudioBufferList) - sizeof(AudioBuffer);  // Subtract sizeof(AudioBuffer) because that part is dynamically sized.
+        if (isInterleaved) {
+            // Interleaved case. This is the simple case because we just have one buffer.
+            allocationSize += sizeof(AudioBuffer) * 1;
+            allocationSize += actualBufferSizeInFrames * mal_get_bytes_per_frame(pDevice->internalFormat, pDevice->internalChannels);
+        } else {
+            // Non-interleaved case. This is the more complex case because there's more than one buffer.
+            allocationSize += sizeof(AudioBuffer) * pDevice->internalChannels;
+            allocationSize += actualBufferSizeInFrames * mal_get_bytes_per_sample(pDevice->internalFormat) * pDevice->internalChannels;
+        }
+        
+        AudioBufferList* pBufferList = (AudioBufferList*)mal_malloc(allocationSize);
+        if (pBufferList == NULL) {
+            ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+            return MAL_OUT_OF_MEMORY;
+        }
+        
+        if (isInterleaved) {
+            pBufferList->mNumberBuffers = 1;
+            pBufferList->mBuffers[0].mNumberChannels = pDevice->internalChannels;
+            pBufferList->mBuffers[0].mDataByteSize = actualBufferSizeInFrames * mal_get_bytes_per_frame(pDevice->internalFormat, pDevice->internalChannels);
+            pBufferList->mBuffers[0].mData = (mal_uint8*)pBufferList + sizeof(AudioBufferList);
+        } else {
+            pBufferList->mNumberBuffers = pDevice->internalChannels;
+            for (mal_uint32 iBuffer = 0; iBuffer < pBufferList->mNumberBuffers; ++iBuffer) {
+                pBufferList->mBuffers[iBuffer].mNumberChannels = 1;
+                pBufferList->mBuffers[iBuffer].mDataByteSize = actualBufferSizeInFrames * mal_get_bytes_per_sample(pDevice->internalFormat);
+                pBufferList->mBuffers[iBuffer].mData = (mal_uint8*)pBufferList + ((sizeof(AudioBufferList) - sizeof(AudioBuffer)) + (sizeof(AudioBuffer) * pDevice->internalChannels)) + (actualBufferSizeInFrames * mal_get_bytes_per_sample(pDevice->internalFormat) * iBuffer);
+            }
+        }
+        
+        pDevice->coreaudio.pAudioBufferList = pBufferList;
+    }
+    
+    
+    // Initialize the audio unit.
+    status = ((mal_AudioUnitInitialize_proc)pContext->coreaudio.AudioUnitInitialize)((AudioUnit)pDevice->coreaudio.audioUnit);
+    if (status != noErr) {
+        mal_free(pDevice->coreaudio.pAudioBufferList);
+        ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit);
+        return mal_result_from_OSStatus(status);
+    }
+    
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_device__start_backend__coreaudio(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+    
+    OSStatus status = ((mal_AudioOutputUnitStart_proc)pDevice->pContext->coreaudio.AudioOutputUnitStart)((AudioUnit)pDevice->coreaudio.audioUnit);
+    if (status != noErr) {
+        return mal_result_from_OSStatus(status);
+    }
+    
+    return MAL_SUCCESS;
+}
+
+mal_result mal_device__stop_backend__coreaudio(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+    
+    OSStatus status = ((mal_AudioOutputUnitStop_proc)pDevice->pContext->coreaudio.AudioOutputUnitStop)((AudioUnit)pDevice->coreaudio.audioUnit);
+    if (status != noErr) {
+        return mal_result_from_OSStatus(status);
+    }
+    
+    return MAL_SUCCESS;
+}
+#endif  // Core Audio
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// OSS Backend
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_HAS_OSS
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/soundcard.h>
+
+#ifndef SNDCTL_DSP_HALT
+#define SNDCTL_DSP_HALT SNDCTL_DSP_RESET
+#endif
+
+int mal_open_temp_device__oss()
+{
+    // The OSS sample code uses "/dev/mixer" as the device for getting system properties so I'm going to do the same.
+    int fd = open("/dev/mixer", O_RDONLY, 0);
+    if (fd >= 0) {
+        return fd;
+    }
+
+    return -1;
+}
+
+mal_result mal_context_open_device__oss(mal_context* pContext, mal_device_type type, const mal_device_id* pDeviceID, int* pfd)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pfd != NULL);
+    (void)pContext;
+
+    *pfd = -1;
+
+    char deviceName[64];
+    if (pDeviceID != NULL) {
+        mal_strncpy_s(deviceName, sizeof(deviceName), pDeviceID->oss, (size_t)-1);
+    } else {
+        mal_strncpy_s(deviceName, sizeof(deviceName), "/dev/dsp", (size_t)-1);
+    }
+
+    *pfd = open(deviceName, (type == mal_device_type_playback) ? O_WRONLY : O_RDONLY, 0);
+    if (*pfd == -1) {
+        return MAL_FAILED_TO_OPEN_BACKEND_DEVICE;
+    }
+
+    return MAL_SUCCESS;
+}
+
+mal_bool32 mal_context_is_device_id_equal__oss(mal_context* pContext, const mal_device_id* pID0, const mal_device_id* pID1)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pID0 != NULL);
+    mal_assert(pID1 != NULL);
+    (void)pContext;
+
+    return mal_strcmp(pID0->oss, pID1->oss) == 0;
+}
+
+mal_result mal_context_enumerate_devices__oss(mal_context* pContext, mal_enum_devices_callback_proc callback, void* pUserData)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(callback != NULL);
+
+    int fd = mal_open_temp_device__oss();
+    if (fd == -1) {
+        return mal_context_post_error(pContext, NULL, "[OSS] Failed to open a temporary device for retrieving system information used for device enumeration.", MAL_NO_BACKEND);
+    }
+
+    oss_sysinfo si;
+    int result = ioctl(fd, SNDCTL_SYSINFO, &si);
+    if (result != -1) {
+        for (int iAudioDevice = 0; iAudioDevice < si.numaudios; ++iAudioDevice) {
+            oss_audioinfo ai;
+            ai.dev = iAudioDevice;
+            result = ioctl(fd, SNDCTL_AUDIOINFO, &ai);
+            if (result != -1) {
+                if (ai.devnode[0] != '\0') {    // <-- Can be blank, according to documentation.
+                    mal_device_info deviceInfo;
+                    mal_zero_object(&deviceInfo);
+
+                    // ID
+                    mal_strncpy_s(deviceInfo.id.oss, sizeof(deviceInfo.id.oss), ai.devnode, (size_t)-1);
+
+                    // The human readable device name should be in the "ai.handle" variable, but it can
+                    // sometimes be empty in which case we just fall back to "ai.name" which is less user
+                    // friendly, but usually has a value.
+                    if (ai.handle[0] != '\0') {
+                        mal_strncpy_s(deviceInfo.name, sizeof(deviceInfo.name), ai.handle, (size_t)-1);
+                    } else {
+                        mal_strncpy_s(deviceInfo.name, sizeof(deviceInfo.name), ai.name, (size_t)-1);
+                    }
+
+                    // The device can be both playback and capture.
+                    mal_bool32 isTerminating = MAL_FALSE;
+                    if (!isTerminating && (ai.caps & PCM_CAP_OUTPUT) != 0) {
+                        isTerminating = !callback(pContext, mal_device_type_playback, &deviceInfo, pUserData);
+                    }
+                    if (!isTerminating && (ai.caps & PCM_CAP_INPUT) != 0) {
+                        isTerminating = !callback(pContext, mal_device_type_capture, &deviceInfo, pUserData);
+                    }
+
+                    if (isTerminating) {
+                        break;
+                    }
+                }
+            }
+        }
+    } else {
+        close(fd);
+        return mal_context_post_error(pContext, NULL, "[OSS] Failed to retrieve system information for device enumeration.", MAL_NO_BACKEND);
+    }
+
+    close(fd);
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_get_device_info__oss(mal_context* pContext, mal_device_type deviceType, const mal_device_id* pDeviceID, mal_share_mode shareMode, mal_device_info* pDeviceInfo)
+{
+    mal_assert(pContext != NULL);
+    (void)shareMode;
+
+    // Handle the default device a little differently.
+    if (pDeviceID == NULL) {
         if (deviceType == mal_device_type_playback) {
             mal_strncpy_s(pDeviceInfo->name, sizeof(pDeviceInfo->name), MAL_DEFAULT_PLAYBACK_DEVICE_NAME, (size_t)-1);
         } else {
@@ -13632,8 +15444,9 @@ mal_result mal_device__stop_backend__opensl(mal_device* pDevice)
 
     // Make sure the client is aware that the device has stopped. There may be an OpenSL|ES callback for this, but I haven't found it.
     mal_device__set_state(pDevice, MAL_STATE_STOPPED);
-    if (pDevice->onStop) {
-        pDevice->onStop(pDevice);
+    mal_stop_proc onStop = pDevice->onStop;
+    if (onStop) {
+        onStop(pDevice);
     }
 
     return MAL_SUCCESS;
@@ -14912,8 +16725,8 @@ mal_result mal_context_init__sdl(mal_context* pContext)
         "SDL2.dll",
         "SDL.dll"
 #elif defined(MAL_APPLE)
-        "libSDL2-2.0.0.dylib",  // Can any Mac users out there comfirm these library names?
-        "libSDL-1.2.0.dylib"
+        "SDL2.framework/SDL2",
+        "SDL.framework/SDL"
 #else
         "libSDL2-2.0.so.0",
         "libSDL-1.2.so.0"
@@ -15042,11 +16855,12 @@ mal_result mal_device_init__sdl(mal_context* pContext, mal_device_type type, mal
             fType = 2.0f;
         }
 
-        // Backend tax. Need to fiddle with this. Special case for Emscripten.
+        // Backend tax. Need to fiddle with this. Keep in mind that SDL always rounds the buffer size up to the next
+        // power of two which should cover the natural API overhead. Special case for Emscripten.
     #if defined(__EMSCRIPTEN__)
-        float fBackend = 4.0f;
+        float fBackend = 1.0f;
     #else
-        float fBackend = 2.0f;
+        float fBackend = 1.0f;
     #endif
 
         bufferSize = mal_calculate_default_buffer_size_in_frames(pConfig->performanceProfile, pConfig->sampleRate, fCPUSpeed*fType*fBackend);
@@ -15156,6 +16970,12 @@ mal_result mal_device__stop_backend__sdl(mal_device* pDevice)
     {
         ((MAL_PFN_SDL_PauseAudio)pDevice->pContext->sdl.SDL_PauseAudio)(1);
     }
+    
+    mal_device__set_state(pDevice, MAL_STATE_STOPPED);
+    mal_stop_proc onStop = pDevice->onStop;
+    if (onStop) {
+        onStop(pDevice);
+    }
 
     return MAL_SUCCESS;
 }
@@ -15614,6 +17434,7 @@ const mal_backend g_malDefaultBackends[] = {
     mal_backend_wasapi,
     mal_backend_dsound,
     mal_backend_winmm,
+    mal_backend_coreaudio,
     mal_backend_oss,
     mal_backend_pulseaudio,
     mal_backend_alsa,
@@ -15627,8 +17448,9 @@ const mal_backend g_malDefaultBackends[] = {
 mal_bool32 mal_is_backend_asynchronous(mal_backend backend)
 {
     return
-        backend == mal_backend_jack   ||
-        backend == mal_backend_opensl ||
+        backend == mal_backend_jack      ||
+        backend == mal_backend_coreaudio ||
+        backend == mal_backend_opensl    ||
         backend == mal_backend_sdl;
 }
 
@@ -15703,6 +17525,12 @@ mal_result mal_context_init(const mal_backend backends[], mal_uint32 backendCoun
                 result = mal_context_init__jack(pContext);
             } break;
         #endif
+        #ifdef MAL_HAS_COREAUDIO
+            case mal_backend_coreaudio:
+            {
+                result = mal_context_init__coreaudio(pContext);
+            } break;
+        #endif
         #ifdef MAL_HAS_OSS
             case mal_backend_oss:
             {
@@ -15801,6 +17629,12 @@ mal_result mal_context_uninit(mal_context* pContext)
             mal_context_uninit__jack(pContext);
         } break;
     #endif
+    #ifdef MAL_HAS_COREAUDIO
+        case mal_backend_coreaudio:
+        {
+            mal_context_uninit__coreaudio(pContext);
+        } break;
+    #endif
     #ifdef MAL_HAS_OSS
         case mal_backend_oss:
         {
@@ -16141,6 +17975,12 @@ mal_result mal_device_init(mal_context* pContext, mal_device_type type, mal_devi
             result = mal_device_init__jack(pContext, type, pDeviceID, &config, pDevice);
         } break;
     #endif
+    #ifdef MAL_HAS_COREAUDIO
+        case mal_backend_coreaudio:
+        {
+            result = mal_device_init__coreaudio(pContext, type, pDeviceID, &config, pDevice);
+        } break;
+    #endif
     #ifdef MAL_HAS_OSS
         case mal_backend_oss:
         {
@@ -16321,11 +18161,6 @@ void mal_device_uninit(mal_device* pDevice)
         mal_thread_wait(&pDevice->thread);
     }
 
-    mal_event_uninit(&pDevice->stopEvent);
-    mal_event_uninit(&pDevice->startEvent);
-    mal_event_uninit(&pDevice->wakeupEvent);
-    mal_mutex_uninit(&pDevice->lock);
-
 #ifdef MAL_HAS_WASAPI
     if (pDevice->pContext->backend == mal_backend_wasapi) {
         mal_device_uninit__wasapi(pDevice);
@@ -16356,6 +18191,11 @@ void mal_device_uninit(mal_device* pDevice)
         mal_device_uninit__jack(pDevice);
     }
 #endif
+#ifdef MAL_HAS_COREAUDIO
+    if (pDevice->pContext->backend == mal_backend_coreaudio) {
+        mal_device_uninit__coreaudio(pDevice);
+    }
+#endif
 #ifdef MAL_HAS_OSS
     if (pDevice->pContext->backend == mal_backend_oss) {
         mal_device_uninit__oss(pDevice);
@@ -16382,6 +18222,10 @@ void mal_device_uninit(mal_device* pDevice)
     }
 #endif
 
+    mal_event_uninit(&pDevice->stopEvent);
+    mal_event_uninit(&pDevice->startEvent);
+    mal_event_uninit(&pDevice->wakeupEvent);
+    mal_mutex_uninit(&pDevice->lock);
 
     if (pDevice->isOwnerOfContext) {
         mal_context_uninit(pDevice->pContext);
@@ -16445,6 +18289,14 @@ mal_result mal_device_start(mal_device* pDevice)
             }
         } else
 #endif
+#ifdef MAL_HAS_COREAUDIO
+        if (pDevice->pContext->backend == mal_backend_coreaudio) {
+            result = mal_device__start_backend__coreaudio(pDevice);
+            if (result == MAL_SUCCESS) {
+                mal_device__set_state(pDevice, MAL_STATE_STARTED);
+            }
+        } else
+#endif
 #ifdef MAL_HAS_OPENSL
         if (pDevice->pContext->backend == mal_backend_opensl) {
             result = mal_device__start_backend__opensl(pDevice);
@@ -16511,6 +18363,11 @@ mal_result mal_device_stop(mal_device* pDevice)
             mal_device__stop_backend__jack(pDevice);
         } else
 #endif
+#ifdef MAL_HAS_COREAUDIO
+        if (pDevice->pContext->backend == mal_backend_coreaudio) {
+            mal_device__stop_backend__coreaudio(pDevice);
+        } else
+#endif
 #ifdef MAL_HAS_OPENSL
         if (pDevice->pContext->backend == mal_backend_opensl) {
             mal_device__stop_backend__opensl(pDevice);
@@ -17210,8 +19067,26 @@ void mal_pcm_u8_to_s16__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_u8_to_s16__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_u8_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_u8_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_u8_to_s16__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_u8_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_s16__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_u8_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
 }
@@ -17221,13 +19096,9 @@ void mal_pcm_u8_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither_
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_u8_to_s16__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_u8_to_s16__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17254,8 +19125,26 @@ void mal_pcm_u8_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_u8_to_s24__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_u8_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_u8_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_u8_to_s24__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_u8_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_s24__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_u8_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
 }
@@ -17265,13 +19154,9 @@ void mal_pcm_u8_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither_
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_u8_to_s24__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_u8_to_s24__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17296,8 +19181,26 @@ void mal_pcm_u8_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_u8_to_s32__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_u8_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_u8_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_u8_to_s32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_u8_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_s32__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_u8_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
 }
@@ -17307,13 +19210,9 @@ void mal_pcm_u8_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither_
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_u8_to_s32__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_u8_to_s32__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17339,8 +19238,26 @@ void mal_pcm_u8_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_u8_to_f32__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_u8_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_u8_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_u8_to_f32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_u8_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_u8_to_f32__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_u8_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
 }
@@ -17350,13 +19267,9 @@ void mal_pcm_u8_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither_
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_u8_to_f32__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_u8_to_f32__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17477,8 +19390,26 @@ void mal_pcm_s16_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s16_to_u8__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s16_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s16_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s16_to_u8__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s16_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_u8__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s16_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
 }
@@ -17488,13 +19419,9 @@ void mal_pcm_s16_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s16_to_u8__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s16_to_u8__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17525,8 +19452,26 @@ void mal_pcm_s16_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s16_to_s24__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s16_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s16_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s16_to_s24__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s16_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_s24__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s16_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
 }
@@ -17536,13 +19481,9 @@ void mal_pcm_s16_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s16_to_s24__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s16_to_s24__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17564,8 +19505,26 @@ void mal_pcm_s16_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s16_to_s32__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s16_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s16_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s16_to_s32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s16_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_s32__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s16_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
 }
@@ -17575,13 +19534,9 @@ void mal_pcm_s16_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s16_to_s32__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s16_to_s32__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17615,8 +19570,26 @@ void mal_pcm_s16_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s16_to_f32__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s16_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s16_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s16_to_f32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s16_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s16_to_f32__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s16_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
 }
@@ -17626,13 +19599,9 @@ void mal_pcm_s16_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s16_to_f32__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s16_to_f32__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17731,8 +19700,26 @@ void mal_pcm_s24_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s24_to_u8__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s24_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s24_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s24_to_u8__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s24_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_u8__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s24_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
 }
@@ -17742,13 +19729,9 @@ void mal_pcm_s24_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s24_to_u8__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s24_to_u8__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17788,8 +19771,26 @@ void mal_pcm_s24_to_s16__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s24_to_s16__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s24_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s24_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s24_to_s16__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s24_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_s16__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s24_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
 }
@@ -17799,13 +19800,9 @@ void mal_pcm_s24_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s24_to_s16__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s24_to_s16__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17835,8 +19832,26 @@ void mal_pcm_s24_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s24_to_s32__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s24_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s24_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s24_to_s32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s24_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_s32__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s24_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
 }
@@ -17846,13 +19861,9 @@ void mal_pcm_s24_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s24_to_s32__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s24_to_s32__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -17886,8 +19897,26 @@ void mal_pcm_s24_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s24_to_f32__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s24_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s24_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s24_to_f32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s24_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s24_to_f32__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s24_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
 }
@@ -17897,13 +19926,9 @@ void mal_pcm_s24_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s24_to_f32__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s24_to_f32__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -18009,8 +20034,26 @@ void mal_pcm_s32_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s32_to_u8__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s32_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s32_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s32_to_u8__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s32_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_u8__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s32_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
 }
@@ -18020,13 +20063,9 @@ void mal_pcm_s32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s32_to_u8__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s32_to_u8__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -18066,8 +20105,26 @@ void mal_pcm_s32_to_s16__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s32_to_s16__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s32_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s32_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s32_to_s16__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s32_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_s16__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s32_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
 }
@@ -18077,13 +20134,9 @@ void mal_pcm_s32_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s32_to_s16__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s32_to_s16__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -18108,8 +20161,26 @@ void mal_pcm_s32_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s32_to_s24__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s32_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s32_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s32_to_s24__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s32_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_s24__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s32_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
 }
@@ -18119,13 +20190,9 @@ void mal_pcm_s32_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s32_to_s24__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s32_to_s24__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -18165,8 +20232,26 @@ void mal_pcm_s32_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_s32_to_f32__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_s32_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_s32_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_s32_to_f32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_s32_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_s32_to_f32__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_s32_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
 }
@@ -18176,13 +20261,9 @@ void mal_pcm_s32_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_s32_to_f32__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_s32_to_f32__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -18244,58 +20325,350 @@ void mal_pcm_deinterleave_s32(void** dst, const void* src, mal_uint64 frameCount
 }
 
 
-// f32
-void mal_pcm_f32_to_u8__reference(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
-{
-    mal_uint8* dst_u8 = (mal_uint8*)dst;
-    const float* src_f32 = (const float*)src;
+// f32
+void mal_pcm_f32_to_u8__reference(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_uint8* dst_u8 = (mal_uint8*)dst;
+    const float* src_f32 = (const float*)src;
+
+    float ditherMin = 0;
+    float ditherMax = 0;
+    if (ditherMode != mal_dither_mode_none) {
+        ditherMin = 1.0f / -128;
+        ditherMax = 1.0f /  127;
+    }
+
+    mal_uint64 i;
+    for (i = 0; i < count; i += 1) {
+        float x = src_f32[i];
+        x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
+        x = ((x < -1) ? -1 : ((x > 1) ? 1 : x));    // clip
+        x = x + 1;                                  // -1..1 to 0..2
+        x = x * 127.5f;                             // 0..2 to 0..255
+
+        dst_u8[i] = (mal_uint8)x;
+    }
+}
+
+void mal_pcm_f32_to_u8__optimized(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_u8__reference(dst, src, count, ditherMode);
+}
+
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_f32_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_f32_to_u8__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_f32_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_u8__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_f32_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
+}
+#endif
+
+void mal_pcm_f32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
+    mal_pcm_f32_to_u8__reference(dst, src, count, ditherMode);
+#else
+    mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
+#endif
+}
+
+
+void mal_pcm_f32_to_s16__reference(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_int16* dst_s16 = (mal_int16*)dst;
+    const float* src_f32 = (const float*)src;
+
+    float ditherMin = 0;
+    float ditherMax = 0;
+    if (ditherMode != mal_dither_mode_none) {
+        ditherMin = 1.0f / -32768;
+        ditherMax = 1.0f /  32767;
+    }
+
+    mal_uint64 i;
+    for (i = 0; i < count; i += 1) {
+        float x = src_f32[i];
+        x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
+        x = ((x < -1) ? -1 : ((x > 1) ? 1 : x));    // clip
+
+#if 0
+        // The accurate way.
+        x = x + 1;                                  // -1..1 to 0..2
+        x = x * 32767.5f;                           // 0..2 to 0..65535
+        x = x - 32768.0f;                           // 0...65535 to -32768..32767
+#else
+        // The fast way.
+        x = x * 32767.0f;                           // -1..1 to -32767..32767
+#endif
+
+        dst_s16[i] = (mal_int16)x;
+    }
+}
+
+void mal_pcm_f32_to_s16__optimized(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_int16* dst_s16 = (mal_int16*)dst;
+    const float* src_f32 = (const float*)src;
+
+    float ditherMin = 0;
+    float ditherMax = 0;
+    if (ditherMode != mal_dither_mode_none) {
+        ditherMin = 1.0f / -32768;
+        ditherMax = 1.0f /  32767;
+    }
+
+    mal_uint64 i = 0;
+
+    // Unrolled.
+    mal_uint64 count4 = count >> 2;
+    for (mal_uint64 i4 = 0; i4 < count4; i4 += 1) {
+        float d0 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
+        float d1 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
+        float d2 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
+        float d3 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
+        
+        float x0 = src_f32[i+0];
+        float x1 = src_f32[i+1];
+        float x2 = src_f32[i+2];
+        float x3 = src_f32[i+3];
+
+        x0 = x0 + d0;
+        x1 = x1 + d1;
+        x2 = x2 + d2;
+        x3 = x3 + d3;
+
+        x0 = ((x0 < -1) ? -1 : ((x0 > 1) ? 1 : x0));
+        x1 = ((x1 < -1) ? -1 : ((x1 > 1) ? 1 : x1));
+        x2 = ((x2 < -1) ? -1 : ((x2 > 1) ? 1 : x2));
+        x3 = ((x3 < -1) ? -1 : ((x3 > 1) ? 1 : x3));
+
+        x0 = x0 * 32767.0f;
+        x1 = x1 * 32767.0f;
+        x2 = x2 * 32767.0f;
+        x3 = x3 * 32767.0f;
+
+        dst_s16[i+0] = (mal_int16)x0;
+        dst_s16[i+1] = (mal_int16)x1;
+        dst_s16[i+2] = (mal_int16)x2;
+        dst_s16[i+3] = (mal_int16)x3;
+
+        i += 4;
+    }
+
+    // Leftover.
+    for (; i < count; i += 1) {
+        float x = src_f32[i];
+        x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
+        x = ((x < -1) ? -1 : ((x > 1) ? 1 : x));    // clip
+        x = x * 32767.0f;                           // -1..1 to -32767..32767
+
+        dst_s16[i] = (mal_int16)x;
+    }
+}
+
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_f32_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_int16* dst_s16 = (mal_int16*)dst;
+    const float* src_f32 = (const float*)src;
+
+    float ditherMin = 0;
+    float ditherMax = 0;
+    if (ditherMode != mal_dither_mode_none) {
+        ditherMin = 1.0f / -32768;
+        ditherMax = 1.0f /  32767;
+    }
+
+    mal_uint64 i = 0;
+
+    // SSE2. SSE allows us to output 8 s16's at a time which means our loop is unrolled 8 times.
+    mal_uint64 count8 = count >> 3;
+    for (mal_uint64 i8 = 0; i8 < count8; i8 += 1) {
+        __m128 d0;
+        __m128 d1;
+        if (ditherMode == mal_dither_mode_none) {
+            d0 = _mm_set1_ps(0);
+            d1 = _mm_set1_ps(0);
+        } else if (ditherMode == mal_dither_mode_rectangle) {
+            d0 = _mm_set_ps(
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax)
+            );
+            d1 = _mm_set_ps(
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax)
+            );
+        } else {
+            d0 = _mm_set_ps(
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax)
+            );
+            d1 = _mm_set_ps(
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax)
+            );
+        }
+
+        __m128 x0 = *((__m128*)(src_f32 + i) + 0);
+        __m128 x1 = *((__m128*)(src_f32 + i) + 1);
+
+        x0 = _mm_add_ps(x0, d0);
+        x1 = _mm_add_ps(x1, d1);
+
+        x0 = _mm_mul_ps(x0, _mm_set1_ps(32767.0f));
+        x1 = _mm_mul_ps(x1, _mm_set1_ps(32767.0f));
+
+        _mm_stream_si128(((__m128i*)(dst_s16 + i)), _mm_packs_epi32(_mm_cvttps_epi32(x0), _mm_cvttps_epi32(x1)));
+        
+        i += 8;
+    }
+
+
+    // Leftover.
+    for (; i < count; i += 1) {
+        float x = src_f32[i];
+        x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
+        x = ((x < -1) ? -1 : ((x > 1) ? 1 : x));    // clip
+        x = x * 32767.0f;                           // -1..1 to -32767..32767
+
+        dst_s16[i] = (mal_int16)x;
+    }
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_f32_to_s16__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_int16* dst_s16 = (mal_int16*)dst;
+    const float* src_f32 = (const float*)src;
+
+    float ditherMin = 0;
+    float ditherMax = 0;
+    if (ditherMode != mal_dither_mode_none) {
+        ditherMin = 1.0f / -32768;
+        ditherMax = 1.0f /  32767;
+    }
+
+    mal_uint64 i = 0;
+
+    // AVX2. AVX2 allows us to output 16 s16's at a time which means our loop is unrolled 16 times.
+    mal_uint64 count16 = count >> 4;
+    for (mal_uint64 i16 = 0; i16 < count16; i16 += 1) {
+        __m256 d0;
+        __m256 d1;
+        if (ditherMode == mal_dither_mode_none) {
+            d0 = _mm256_set1_ps(0);
+            d1 = _mm256_set1_ps(0);
+        } else if (ditherMode == mal_dither_mode_rectangle) {
+            d0 = _mm256_set_ps(
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax)
+            );
+            d1 = _mm256_set_ps(
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax),
+                mal_dither_f32_rectangle(ditherMin, ditherMax)
+            );
+        } else {
+            d0 = _mm256_set_ps(
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax)
+            );
+            d1 = _mm256_set_ps(
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax),
+                mal_dither_f32_triangle(ditherMin, ditherMax)
+            );
+        }
+
+        __m256 x0 = *((__m256*)(src_f32 + i) + 0);
+        __m256 x1 = *((__m256*)(src_f32 + i) + 1);
+
+        x0 = _mm256_add_ps(x0, d0);
+        x1 = _mm256_add_ps(x1, d1);
 
-    float ditherMin = 0;
-    float ditherMax = 0;
-    if (ditherMode != mal_dither_mode_none) {
-        ditherMin = 1.0f / -128;
-        ditherMax = 1.0f /  127;
+        x0 = _mm256_mul_ps(x0, _mm256_set1_ps(32767.0f));
+        x1 = _mm256_mul_ps(x1, _mm256_set1_ps(32767.0f));
+
+        // Computing the final result is a little more complicated for AVX2 than SSE2.
+        __m256i i0 = _mm256_cvttps_epi32(x0);
+        __m256i i1 = _mm256_cvttps_epi32(x1);
+        __m256i p0 = _mm256_permute2x128_si256(i0, i1, 0 | 32);
+        __m256i p1 = _mm256_permute2x128_si256(i0, i1, 1 | 48);
+        __m256i r  = _mm256_packs_epi32(p0, p1);
+
+        _mm256_stream_si256(((__m256i*)(dst_s16 + i)), r);
+
+        i += 16;
     }
 
-    mal_uint64 i;
-    for (i = 0; i < count; i += 1) {
+
+    // Leftover.
+    for (; i < count; i += 1) {
         float x = src_f32[i];
         x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
         x = ((x < -1) ? -1 : ((x > 1) ? 1 : x));    // clip
-        x = x + 1;                                  // -1..1 to 0..2
-        x = x * 127.5f;                             // 0..2 to 0..255
+        x = x * 32767.0f;                           // -1..1 to -32767..32767
 
-        dst_u8[i] = (mal_uint8)x;
+        dst_s16[i] = (mal_int16)x;
     }
 }
-
-void mal_pcm_f32_to_u8__optimized(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
-{
-    mal_pcm_f32_to_u8__reference(dst, src, count, ditherMode);
-}
-
-#ifdef MAL_USE_SSE
-void mal_pcm_f32_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
-{
-    mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
-}
 #endif
-
-void mal_pcm_f32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_f32_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
-#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
-    mal_pcm_f32_to_u8__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_f32_to_u8__sse(dst, src, count, ditherMode);
-#else
-    mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
-#endif
-#endif
+    // TODO: Convert this from AVX to AVX-512.
+    mal_pcm_f32_to_s16__avx2(dst, src, count, ditherMode);
 }
-
-
-void mal_pcm_f32_to_s16__reference(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_f32_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_int16* dst_s16 = (mal_int16*)dst;
     const float* src_f32 = (const float*)src;
@@ -18307,49 +20680,82 @@ void mal_pcm_f32_to_s16__reference(void* dst, const void* src, mal_uint64 count,
         ditherMax = 1.0f /  32767;
     }
 
-    mal_uint64 i;
-    for (i = 0; i < count; i += 1) {
+    mal_uint64 i = 0;
+
+    // NEON. NEON allows us to output 8 s16's at a time which means our loop is unrolled 8 times.
+    mal_uint64 count8 = count >> 3;
+    for (mal_uint64 i8 = 0; i8 < count8; i8 += 1) {
+        float32x4_t d0;
+        float32x4_t d1;
+        if (ditherMode == mal_dither_mode_none) {
+            d0 = vmovq_n_f32(0);
+            d1 = vmovq_n_f32(0);
+        } else if (ditherMode == mal_dither_mode_rectangle) {
+            float d0v[4];
+            d0v[0] = mal_dither_f32_rectangle(ditherMin, ditherMax);
+            d0v[1] = mal_dither_f32_rectangle(ditherMin, ditherMax);
+            d0v[2] = mal_dither_f32_rectangle(ditherMin, ditherMax);
+            d0v[3] = mal_dither_f32_rectangle(ditherMin, ditherMax);
+            d0 = vld1q_f32(d0v);
+
+            float d1v[4];
+            d1v[0] = mal_dither_f32_rectangle(ditherMin, ditherMax);
+            d1v[1] = mal_dither_f32_rectangle(ditherMin, ditherMax);
+            d1v[2] = mal_dither_f32_rectangle(ditherMin, ditherMax);
+            d1v[3] = mal_dither_f32_rectangle(ditherMin, ditherMax);
+            d1 = vld1q_f32(d1v);
+        } else {
+            float d0v[4];
+            d0v[0] = mal_dither_f32_triangle(ditherMin, ditherMax);
+            d0v[1] = mal_dither_f32_triangle(ditherMin, ditherMax);
+            d0v[2] = mal_dither_f32_triangle(ditherMin, ditherMax);
+            d0v[3] = mal_dither_f32_triangle(ditherMin, ditherMax);
+            d0 = vld1q_f32(d0v);
+
+            float d1v[4];
+            d1v[0] = mal_dither_f32_triangle(ditherMin, ditherMax);
+            d1v[1] = mal_dither_f32_triangle(ditherMin, ditherMax);
+            d1v[2] = mal_dither_f32_triangle(ditherMin, ditherMax);
+            d1v[3] = mal_dither_f32_triangle(ditherMin, ditherMax);
+            d1 = vld1q_f32(d1v);
+        }
+
+        float32x4_t x0 = *((float32x4_t*)(src_f32 + i) + 0);
+        float32x4_t x1 = *((float32x4_t*)(src_f32 + i) + 1);
+
+        x0 = vaddq_f32(x0, d0);
+        x1 = vaddq_f32(x1, d1);
+
+        x0 = vmulq_n_f32(x0, 32767.0f);
+        x1 = vmulq_n_f32(x1, 32767.0f);
+
+        int32x4_t i0 = vcvtq_s32_f32(x0);
+        int32x4_t i1 = vcvtq_s32_f32(x1);
+        *((int16x8_t*)(dst_s16 + i)) = vcombine_s16(vqmovn_s32(i0), vqmovn_s32(i1));
+
+        i += 8;
+    }
+
+
+    // Leftover.
+    for (; i < count; i += 1) {
         float x = src_f32[i];
         x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
         x = ((x < -1) ? -1 : ((x > 1) ? 1 : x));    // clip
-
-#if 0
-        // The accurate way.
-        x = x + 1;                                  // -1..1 to 0..2
-        x = x * 32767.5f;                           // 0..2 to 0..65535
-        x = x - 32768.0f;                           // 0...65535 to -32768..32767
-#else
-        // The fast way.
         x = x * 32767.0f;                           // -1..1 to -32767..32767
-#endif
 
         dst_s16[i] = (mal_int16)x;
     }
 }
-
-void mal_pcm_f32_to_s16__optimized(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
-{
-    mal_pcm_f32_to_s16__reference(dst, src, count, ditherMode);
-}
-
-#ifdef MAL_USE_SSE
-void mal_pcm_f32_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
-{
-    mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode);
-}
 #endif
 
 void mal_pcm_f32_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_f32_to_s16__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_f32_to_s16__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -18387,8 +20793,26 @@ void mal_pcm_f32_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_f32_to_s24__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_f32_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_f32_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_f32_to_s24__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_f32_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_s24__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_f32_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
 }
@@ -18398,13 +20822,9 @@ void mal_pcm_f32_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_f32_to_s24__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_f32_to_s24__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -18439,8 +20859,26 @@ void mal_pcm_f32_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
     mal_pcm_f32_to_s32__reference(dst, src, count, ditherMode);
 }
 
-#ifdef MAL_USE_SSE
-void mal_pcm_f32_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+#if defined(MAL_SUPPORT_SSE2)
+void mal_pcm_f32_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+void mal_pcm_f32_to_s32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+void mal_pcm_f32_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
+{
+    mal_pcm_f32_to_s32__avx2(dst, src, count, ditherMode);
+}
+#endif
+#if defined(MAL_SUPPORT_NEON)
+void mal_pcm_f32_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
 {
     mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
 }
@@ -18450,13 +20888,9 @@ void mal_pcm_f32_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither
 {
 #ifdef MAL_USE_REFERENCE_CONVERSION_APIS
     mal_pcm_f32_to_s32__reference(dst, src, count, ditherMode);
-#else
-#ifdef MAL_USE_SSE
-    mal_pcm_f32_to_s32__sse(dst, src, count, ditherMode);
 #else
     mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
 #endif
-#endif
 }
 
 
@@ -18511,114 +20945,491 @@ void mal_pcm_deinterleave_f32__reference(void** dst, const void* src, mal_uint64
     }
 }
 
-void mal_pcm_deinterleave_f32__optimized(void** dst, const void* src, mal_uint64 frameCount, mal_uint32 channels)
+void mal_pcm_deinterleave_f32__optimized(void** dst, const void* src, mal_uint64 frameCount, mal_uint32 channels)
+{
+    mal_pcm_deinterleave_f32__reference(dst, src, frameCount, channels);
+}
+
+void mal_pcm_deinterleave_f32(void** dst, const void* src, mal_uint64 frameCount, mal_uint32 channels)
+{
+#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
+    mal_pcm_deinterleave_f32__reference(dst, src, frameCount, channels);
+#else
+    mal_pcm_deinterleave_f32__optimized(dst, src, frameCount, channels);
+#endif
+}
+
+
+void mal_format_converter_init_callbacks__default(mal_format_converter* pConverter)
+{
+    mal_assert(pConverter != NULL);
+
+    switch (pConverter->config.formatIn)
+    {
+        case mal_format_u8:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_u8;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s16;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s24;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s32;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_f32;
+            }
+        } break;
+
+        case mal_format_s16:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_u8;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s16;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s24;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s32;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_f32;
+            }
+        } break;
+
+        case mal_format_s24:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_u8;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s16;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s24;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s32;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_f32;
+            }
+        } break;
+
+        case mal_format_s32:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_u8;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s16;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s24;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s32;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_f32;
+            }
+        } break;
+
+        case mal_format_f32:
+        default:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_u8;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s16;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s24;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s32;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_f32;
+            }
+        } break;
+    }
+}
+
+#if defined(MAL_SUPPORT_SSE2)
+void mal_format_converter_init_callbacks__sse2(mal_format_converter* pConverter)
+{
+    mal_assert(pConverter != NULL);
+
+    switch (pConverter->config.formatIn)
+    {
+        case mal_format_u8:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_u8;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s16__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s24__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s32__sse2;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_f32__sse2;
+            }
+        } break;
+
+        case mal_format_s16:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_u8__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s16;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s24__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s32__sse2;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_f32__sse2;
+            }
+        } break;
+
+        case mal_format_s24:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_u8__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s16__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s24;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s32__sse2;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_f32__sse2;
+            }
+        } break;
+
+        case mal_format_s32:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_u8__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s16__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s24__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s32;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_f32__sse2;
+            }
+        } break;
+
+        case mal_format_f32:
+        default:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_u8__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s16__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s24__sse2;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s32__sse2;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_f32;
+            }
+        } break;
+    }
+}
+#endif
+
+#if defined(MAL_SUPPORT_AVX2)
+void mal_format_converter_init_callbacks__avx2(mal_format_converter* pConverter)
+{
+    mal_assert(pConverter != NULL);
+
+    switch (pConverter->config.formatIn)
+    {
+        case mal_format_u8:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_u8;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s16__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s24__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s32__avx2;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_f32__avx2;
+            }
+        } break;
+
+        case mal_format_s16:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_u8__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s16;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s24__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s32__avx2;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_f32__avx2;
+            }
+        } break;
+
+        case mal_format_s24:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_u8__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s16__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s24;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s32__avx2;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_f32__avx2;
+            }
+        } break;
+
+        case mal_format_s32:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_u8__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s16__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s24__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s32;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_f32__avx2;
+            }
+        } break;
+
+        case mal_format_f32:
+        default:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_u8__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s16__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s24__avx2;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s32__avx2;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_f32;
+            }
+        } break;
+    }
+}
+#endif
+
+#if defined(MAL_SUPPORT_AVX512)
+void mal_format_converter_init_callbacks__avx512(mal_format_converter* pConverter)
 {
-    mal_pcm_deinterleave_f32__reference(dst, src, frameCount, channels);
-}
+    mal_assert(pConverter != NULL);
 
-void mal_pcm_deinterleave_f32(void** dst, const void* src, mal_uint64 frameCount, mal_uint32 channels)
-{
-#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
-    mal_pcm_deinterleave_f32__reference(dst, src, frameCount, channels);
-#else
-    mal_pcm_deinterleave_f32__optimized(dst, src, frameCount, channels);
-#endif
-}
+    switch (pConverter->config.formatIn)
+    {
+        case mal_format_u8:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_u8;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s16__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s24__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s32__avx512;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_f32__avx512;
+            }
+        } break;
 
+        case mal_format_s16:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_u8__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s16;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s24__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s32__avx512;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_f32__avx512;
+            }
+        } break;
 
+        case mal_format_s24:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_u8__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s16__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s24;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s32__avx512;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_f32__avx512;
+            }
+        } break;
 
-mal_result mal_format_converter_init(const mal_format_converter_config* pConfig, mal_format_converter* pConverter)
-{
-    if (pConverter == NULL) {
-        return MAL_INVALID_ARGS;
-    }
-    mal_zero_object(pConverter);
+        case mal_format_s32:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_u8__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s16__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s24__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s32;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_f32__avx512;
+            }
+        } break;
 
-    if (pConfig == NULL) {
-        return MAL_INVALID_ARGS;
+        case mal_format_f32:
+        default:
+        {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_u8__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s16__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s24__avx512;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s32__avx512;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_f32;
+            }
+        } break;
     }
+}
+#endif
 
-    pConverter->config = *pConfig;
+#if defined(MAL_SUPPORT_NEON)
+void mal_format_converter_init_callbacks__neon(mal_format_converter* pConverter)
+{
+    mal_assert(pConverter != NULL);
 
-    switch (pConfig->formatIn)
+    switch (pConverter->config.formatIn)
     {
         case mal_format_u8:
         {
-            if (pConfig->formatOut == mal_format_u8) {
+            if (pConverter->config.formatOut == mal_format_u8) {
                 pConverter->onConvertPCM = mal_pcm_u8_to_u8;
-            } else if (pConfig->formatOut == mal_format_s16) {
-                pConverter->onConvertPCM = mal_pcm_u8_to_s16;
-            } else if (pConfig->formatOut == mal_format_s24) {
-                pConverter->onConvertPCM = mal_pcm_u8_to_s24;
-            } else if (pConfig->formatOut == mal_format_s32) {
-                pConverter->onConvertPCM = mal_pcm_u8_to_s32;
-            } else if (pConfig->formatOut == mal_format_f32) {
-                pConverter->onConvertPCM = mal_pcm_u8_to_f32;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s16__neon;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s24__neon;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_s32__neon;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_u8_to_f32__neon;
             }
         } break;
 
         case mal_format_s16:
         {
-            if (pConfig->formatOut == mal_format_u8) {
-                pConverter->onConvertPCM = mal_pcm_s16_to_u8;
-            } else if (pConfig->formatOut == mal_format_s16) {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_u8__neon;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
                 pConverter->onConvertPCM = mal_pcm_s16_to_s16;
-            } else if (pConfig->formatOut == mal_format_s24) {
-                pConverter->onConvertPCM = mal_pcm_s16_to_s24;
-            } else if (pConfig->formatOut == mal_format_s32) {
-                pConverter->onConvertPCM = mal_pcm_s16_to_s32;
-            } else if (pConfig->formatOut == mal_format_f32) {
-                pConverter->onConvertPCM = mal_pcm_s16_to_f32;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s24__neon;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_s32__neon;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s16_to_f32__neon;
             }
         } break;
 
         case mal_format_s24:
         {
-            if (pConfig->formatOut == mal_format_u8) {
-                pConverter->onConvertPCM = mal_pcm_s24_to_u8;
-            } else if (pConfig->formatOut == mal_format_s16) {
-                pConverter->onConvertPCM = mal_pcm_s24_to_s16;
-            } else if (pConfig->formatOut == mal_format_s24) {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_u8__neon;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s16__neon;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
                 pConverter->onConvertPCM = mal_pcm_s24_to_s24;
-            } else if (pConfig->formatOut == mal_format_s32) {
-                pConverter->onConvertPCM = mal_pcm_s24_to_s32;
-            } else if (pConfig->formatOut == mal_format_f32) {
-                pConverter->onConvertPCM = mal_pcm_s24_to_f32;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_s32__neon;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s24_to_f32__neon;
             }
         } break;
 
         case mal_format_s32:
         {
-            if (pConfig->formatOut == mal_format_u8) {
-                pConverter->onConvertPCM = mal_pcm_s32_to_u8;
-            } else if (pConfig->formatOut == mal_format_s16) {
-                pConverter->onConvertPCM = mal_pcm_s32_to_s16;
-            } else if (pConfig->formatOut == mal_format_s24) {
-                pConverter->onConvertPCM = mal_pcm_s32_to_s24;
-            } else if (pConfig->formatOut == mal_format_s32) {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_u8__neon;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s16__neon;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_s24__neon;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
                 pConverter->onConvertPCM = mal_pcm_s32_to_s32;
-            } else if (pConfig->formatOut == mal_format_f32) {
-                pConverter->onConvertPCM = mal_pcm_s32_to_f32;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
+                pConverter->onConvertPCM = mal_pcm_s32_to_f32__neon;
             }
         } break;
 
         case mal_format_f32:
         default:
         {
-            if (pConfig->formatOut == mal_format_u8) {
-                pConverter->onConvertPCM = mal_pcm_f32_to_u8;
-            } else if (pConfig->formatOut == mal_format_s16) {
-                pConverter->onConvertPCM = mal_pcm_f32_to_s16;
-            } else if (pConfig->formatOut == mal_format_s24) {
-                pConverter->onConvertPCM = mal_pcm_f32_to_s24;
-            } else if (pConfig->formatOut == mal_format_s32) {
-                pConverter->onConvertPCM = mal_pcm_f32_to_s32;
-            } else if (pConfig->formatOut == mal_format_f32) {
+            if (pConverter->config.formatOut == mal_format_u8) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_u8__neon;
+            } else if (pConverter->config.formatOut == mal_format_s16) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s16__neon;
+            } else if (pConverter->config.formatOut == mal_format_s24) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s24__neon;
+            } else if (pConverter->config.formatOut == mal_format_s32) {
+                pConverter->onConvertPCM = mal_pcm_f32_to_s32__neon;
+            } else if (pConverter->config.formatOut == mal_format_f32) {
                 pConverter->onConvertPCM = mal_pcm_f32_to_f32;
             }
         } break;
     }
+}
+#endif
+
+mal_result mal_format_converter_init(const mal_format_converter_config* pConfig, mal_format_converter* pConverter)
+{
+    if (pConverter == NULL) {
+        return MAL_INVALID_ARGS;
+    }
+    mal_zero_object(pConverter);
+
+    if (pConfig == NULL) {
+        return MAL_INVALID_ARGS;
+    }
+
+    pConverter->config = *pConfig;
 
+    // SIMD
+    pConverter->useSSE2   = mal_has_sse2()    && !pConfig->noSSE2;
+    pConverter->useAVX2   = mal_has_avx2()    && !pConfig->noAVX2;
+    pConverter->useAVX512 = mal_has_avx512f() && !pConfig->noAVX512;
+    pConverter->useNEON   = mal_has_neon()    && !pConfig->noNEON;
+
+#if defined(MAL_SUPPORT_AVX512)
+    if (pConverter->useAVX512) {
+        mal_format_converter_init_callbacks__avx512(pConverter);
+    } else
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+    if (pConverter->useAVX2) {
+        mal_format_converter_init_callbacks__avx2(pConverter);
+    } else
+#endif
+#if defined(MAL_SUPPORT_SSE2)
+    if (pConverter->useSSE2) {
+        mal_format_converter_init_callbacks__sse2(pConverter);
+    } else
+#endif
+#if defined(MAL_SUPPORT_NEON)
+    if (pConverter->useNEON) {
+        mal_format_converter_init_callbacks__neon(pConverter);
+    } else
+#endif
+    {
+        mal_format_converter_init_callbacks__default(pConverter);
+    }
 
     switch (pConfig->formatOut)
     {
@@ -19185,7 +21996,7 @@ mal_result mal_channel_router_init(const mal_channel_router_config* pConfig, mal
 
     // SIMD
     pRouter->useSSE2   = mal_has_sse2()    && !pConfig->noSSE2;
-    pRouter->useAVX    = mal_has_avx()     && !pConfig->noAVX;
+    pRouter->useAVX2   = mal_has_avx2()    && !pConfig->noAVX2;
     pRouter->useAVX512 = mal_has_avx512f() && !pConfig->noAVX512;
     pRouter->useNEON   = mal_has_neon()    && !pConfig->noNEON;
 
@@ -19369,9 +22180,9 @@ static MAL_INLINE mal_bool32 mal_channel_router__can_use_sse2(mal_channel_router
     return pRouter->useSSE2 && (((mal_uintptr)pSamplesOut & 15) == 0) && (((mal_uintptr)pSamplesIn & 15) == 0);
 }
 
-static MAL_INLINE mal_bool32 mal_channel_router__can_use_avx(mal_channel_router* pRouter, const float* pSamplesOut, const float* pSamplesIn)
+static MAL_INLINE mal_bool32 mal_channel_router__can_use_avx2(mal_channel_router* pRouter, const float* pSamplesOut, const float* pSamplesIn)
 {
-    return pRouter->useAVX && (((mal_uintptr)pSamplesOut & 31) == 0) && (((mal_uintptr)pSamplesIn & 31) == 0);
+    return pRouter->useAVX2 && (((mal_uintptr)pSamplesOut & 31) == 0) && (((mal_uintptr)pSamplesIn & 31) == 0);
 }
 
 static MAL_INLINE mal_bool32 mal_channel_router__can_use_avx512(mal_channel_router* pRouter, const float* pSamplesOut, const float* pSamplesIn)
@@ -19438,8 +22249,8 @@ void mal_channel_router__do_routing(mal_channel_router* pRouter, mal_uint64 fram
                 }
                 else
 #endif
-#if defined(MAL_SUPPORT_AVX)
-                if (mal_channel_router__can_use_avx(pRouter, ppSamplesOut[iChannelOut], ppSamplesIn[iChannelIn])) {
+#if defined(MAL_SUPPORT_AVX2)
+                if (mal_channel_router__can_use_avx2(pRouter, ppSamplesOut[iChannelOut], ppSamplesIn[iChannelIn])) {
                     __m256 weight = _mm256_set1_ps(pRouter->weights[iChannelIn][iChannelOut]);
 
                     mal_uint64 frameCount8 = frameCount/8;
@@ -19644,7 +22455,7 @@ void mal_src__build_sinc_table__sinc(mal_src* pSRC)
     mal_assert(pSRC != NULL);
 
     pSRC->sinc.table[0] = 1.0f;
-    for (int i = 1; i < mal_countof(pSRC->sinc.table); i += 1) {
+    for (mal_uint32 i = 1; i < mal_countof(pSRC->sinc.table); i += 1) {
         double x = i*MAL_PI_D / MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION;
         pSRC->sinc.table[i] = (float)(sin(x)/x);
     }
@@ -19660,7 +22471,7 @@ void mal_src__build_sinc_table__hann(mal_src* pSRC)
 {
     mal_src__build_sinc_table__sinc(pSRC);
 
-    for (int i = 0; i < mal_countof(pSRC->sinc.table); i += 1) {
+    for (mal_uint32 i = 0; i < mal_countof(pSRC->sinc.table); i += 1) {
         double x = pSRC->sinc.table[i];
         double N = MAL_SRC_SINC_MAX_WINDOW_WIDTH*2;
         double n = ((double)(i) / MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION) + MAL_SRC_SINC_MAX_WINDOW_WIDTH;
@@ -19687,6 +22498,12 @@ mal_result mal_src_init(const mal_src_config* pConfig, mal_src* pSRC)
 
     pSRC->config = *pConfig;
 
+    // SIMD
+    pSRC->useSSE2   = mal_has_sse2()    && !pConfig->noSSE2;
+    pSRC->useAVX2   = mal_has_avx2()    && !pConfig->noAVX2;
+    pSRC->useAVX512 = mal_has_avx512f() && !pConfig->noAVX512;
+    pSRC->useNEON   = mal_has_neon()    && !pConfig->noNEON;
+
     if (pSRC->config.algorithm == mal_src_algorithm_sinc) {
         // Make sure the window width within bounds.
         if (pSRC->config.sinc.windowWidth == 0) {
@@ -19858,7 +22675,7 @@ mal_uint64 mal_src_read_deinterleaved__linear(mal_src* pSRC, mal_uint64 frameCou
 
         // At this point we have a bunch of frames that the client has given to us for processing. From this we can determine the maximum number of output frames
         // that can be processed from this input. We want to output as many samples as possible from our input data.
-        float tAvailable = framesReadFromClient - tBeg;
+        float tAvailable = framesReadFromClient - tBeg - 1; // Subtract 1 because the last input sample is needed for interpolation and cannot be included in the output sample count calculation.
 
         mal_uint32 maxOutputFramesToRead = (mal_uint32)(tAvailable / factor);
         if (maxOutputFramesToRead == 0) {
@@ -19919,6 +22736,9 @@ mal_uint64 mal_src_read_deinterleaved__linear(mal_src* pSRC, mal_uint64 frameCou
                 float iNextSample = iPrevSample + 1;
                 float alpha = t - iPrevSample;
 
+                mal_assert(iPrevSample < mal_countof(pSRC->linear.input[iChannel]));
+                mal_assert(iNextSample < mal_countof(pSRC->linear.input[iChannel]));
+
                 float prevSample = ppSamplesFromClient[iChannel][(mal_uint32)iPrevSample];
                 float nextSample = ppSamplesFromClient[iChannel][(mal_uint32)iNextSample];
 
@@ -20010,6 +22830,9 @@ mal_src_config mal_src_config_init(mal_uint32 sampleRateIn, mal_uint32 sampleRat
 //
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+// Comment this to disable interpolation of table lookups. Less accurate, but faster.
+#define MAL_USE_SINC_TABLE_INTERPOLATION
+
 // Retrieves a sample from the input buffer's window. Values >= 0 retrieve future samples. Negative values return past samples.
 static MAL_INLINE float mal_src_sinc__get_input_sample_from_window(const mal_src* pSRC, mal_uint32 channel, mal_uint32 windowPosInSamples, mal_int32 sampleIndex)
 {
@@ -20030,14 +22853,14 @@ static MAL_INLINE float mal_src_sinc__interpolation_factor(const mal_src* pSRC,
     mal_assert(pSRC != NULL);
 
     float xabs = (float)fabs(x);
-    if (xabs >= MAL_SRC_SINC_MAX_WINDOW_WIDTH /*pSRC->config.sinc.windowWidth*/) {
-        return 0;
-    }
+    //if (xabs >= MAL_SRC_SINC_MAX_WINDOW_WIDTH /*pSRC->config.sinc.windowWidth*/) {
+    //    xabs = 1;   // <-- A non-zero integer will always return 0.
+    //}
 
     xabs = xabs * MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION;
     mal_int32 ixabs = (mal_int32)xabs;
 
-#if 1
+#if defined(MAL_USE_SINC_TABLE_INTERPOLATION)
     float a = xabs - ixabs;
     return mal_mix_f32_fast(pSRC->sinc.table[ixabs], pSRC->sinc.table[ixabs+1], a);
 #else
@@ -20045,6 +22868,146 @@ static MAL_INLINE float mal_src_sinc__interpolation_factor(const mal_src* pSRC,
 #endif
 }
 
+#if defined(MAL_SUPPORT_SSE2)
+static MAL_INLINE __m128 mal_fabsf_sse2(__m128 x)
+{
+    return _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)), x);
+}
+
+static MAL_INLINE __m128 mal_truncf_sse2(__m128 x)
+{
+    return _mm_cvtepi32_ps(_mm_cvttps_epi32(x));
+}
+
+static MAL_INLINE __m128 mal_src_sinc__interpolation_factor__sse2(const mal_src* pSRC, __m128 x)
+{
+    //__m128 windowWidth128 = _mm_set1_ps(MAL_SRC_SINC_MAX_WINDOW_WIDTH);
+    __m128 resolution128  = _mm_set1_ps(MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION);
+    //__m128 one            = _mm_set1_ps(1);
+
+    __m128 xabs = mal_fabsf_sse2(x);
+
+    // if (MAL_SRC_SINC_MAX_WINDOW_WIDTH <= xabs) xabs = 1 else xabs = xabs;
+    //__m128 xcmp = _mm_cmp_ps(windowWidth128, xabs, 2);                      // 2 = Less than or equal = _mm_cmple_ps.
+    //xabs = _mm_or_ps(_mm_and_ps(one, xcmp), _mm_andnot_ps(xcmp, xabs));     // xabs = (xcmp) ? 1 : xabs;
+
+    xabs = _mm_mul_ps(xabs, resolution128);
+    __m128i ixabs = _mm_cvttps_epi32(xabs);
+
+    int* ixabsv = (int*)&ixabs;
+    
+    __m128 lo = _mm_set_ps(
+        pSRC->sinc.table[ixabsv[3]],
+        pSRC->sinc.table[ixabsv[2]],
+        pSRC->sinc.table[ixabsv[1]],
+        pSRC->sinc.table[ixabsv[0]]
+    );
+
+    __m128 hi = _mm_set_ps(
+        pSRC->sinc.table[ixabsv[3]+1],
+        pSRC->sinc.table[ixabsv[2]+1],
+        pSRC->sinc.table[ixabsv[1]+1],
+        pSRC->sinc.table[ixabsv[0]+1]
+    );
+
+    __m128 a = _mm_sub_ps(xabs, _mm_cvtepi32_ps(ixabs));
+    __m128 r = mal_mix_f32_fast__sse2(lo, hi, a);
+
+    return r;
+}
+#endif
+
+#if defined(MAL_SUPPORT_AVX2)
+static MAL_INLINE __m256 mal_fabsf_avx2(__m256 x)
+{
+    return _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)), x);
+}
+
+#if 0
+static MAL_INLINE __m256 mal_src_sinc__interpolation_factor__avx2(const mal_src* pSRC, __m256 x)
+{
+    //__m256 windowWidth256 = _mm256_set1_ps(MAL_SRC_SINC_MAX_WINDOW_WIDTH);
+    __m256 resolution256  = _mm256_set1_ps(MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION);
+    //__m256 one            = _mm256_set1_ps(1);
+
+    __m256 xabs = mal_fabsf_avx2(x);
+
+    // if (MAL_SRC_SINC_MAX_WINDOW_WIDTH <= xabs) xabs = 1 else xabs = xabs;
+    //__m256 xcmp = _mm256_cmp_ps(windowWidth256, xabs, 2);                      // 2 = Less than or equal = _mm_cmple_ps.
+    //xabs = _mm256_or_ps(_mm256_and_ps(one, xcmp), _mm256_andnot_ps(xcmp, xabs));     // xabs = (xcmp) ? 1 : xabs;
+
+    xabs = _mm256_mul_ps(xabs, resolution256);
+
+    __m256i ixabs = _mm256_cvttps_epi32(xabs);
+    __m256 a = _mm256_sub_ps(xabs, _mm256_cvtepi32_ps(ixabs));
+
+    
+    int* ixabsv = (int*)&ixabs;
+
+    __m256 lo = _mm256_set_ps(
+        pSRC->sinc.table[ixabsv[7]],
+        pSRC->sinc.table[ixabsv[6]],
+        pSRC->sinc.table[ixabsv[5]],
+        pSRC->sinc.table[ixabsv[4]],
+        pSRC->sinc.table[ixabsv[3]],
+        pSRC->sinc.table[ixabsv[2]],
+        pSRC->sinc.table[ixabsv[1]],
+        pSRC->sinc.table[ixabsv[0]]
+    );
+    
+    __m256 hi = _mm256_set_ps(
+        pSRC->sinc.table[ixabsv[7]+1],
+        pSRC->sinc.table[ixabsv[6]+1],
+        pSRC->sinc.table[ixabsv[5]+1],
+        pSRC->sinc.table[ixabsv[4]+1],
+        pSRC->sinc.table[ixabsv[3]+1],
+        pSRC->sinc.table[ixabsv[2]+1],
+        pSRC->sinc.table[ixabsv[1]+1],
+        pSRC->sinc.table[ixabsv[0]+1]
+    );
+
+    __m256 r = mal_mix_f32_fast__avx2(lo, hi, a);
+
+    return r;
+}
+#endif
+
+#endif
+
+#if defined(MAL_SUPPORT_NEON)
+static MAL_INLINE float32x4_t mal_fabsf_neon(float32x4_t x)
+{
+    return vabdq_f32(vmovq_n_f32(0), x);
+}
+
+static MAL_INLINE float32x4_t mal_src_sinc__interpolation_factor__neon(const mal_src* pSRC, float32x4_t x)
+{
+    float32x4_t xabs = mal_fabsf_neon(x);
+    xabs = vmulq_n_f32(xabs, MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION);
+
+    int32x4_t ixabs = vcvtq_s32_f32(xabs);
+
+    int* ixabsv = (int*)&ixabs;
+    
+    float lo[4];
+    lo[0] = pSRC->sinc.table[ixabsv[0]];
+    lo[1] = pSRC->sinc.table[ixabsv[1]];
+    lo[2] = pSRC->sinc.table[ixabsv[2]];
+    lo[3] = pSRC->sinc.table[ixabsv[3]];
+
+    float hi[4];
+    hi[0] = pSRC->sinc.table[ixabsv[0]+1];
+    hi[1] = pSRC->sinc.table[ixabsv[1]+1];
+    hi[2] = pSRC->sinc.table[ixabsv[2]+1];
+    hi[3] = pSRC->sinc.table[ixabsv[3]+1];
+
+    float32x4_t a = vsubq_f32(xabs, vcvtq_f32_s32(ixabs));
+    float32x4_t r = mal_mix_f32_fast__neon(vld1q_f32(lo), vld1q_f32(hi), a);
+
+    return r;
+}
+#endif
+
 mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount, void** ppSamplesOut, void* pUserData)
 {
     mal_assert(pSRC != NULL);
@@ -20057,9 +23020,48 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount
     mal_int32 windowWidth  = (mal_int32)pSRC->config.sinc.windowWidth;
     mal_int32 windowWidth2 = windowWidth*2;
 
+    // There are cases where it's actually more efficient to increase the window width so that it's aligned with the respective
+    // SIMD pipeline being used.
+    mal_int32 windowWidthSIMD = windowWidth;
+#if defined(MAL_SUPPORT_NEON)
+    if (pSRC->useNEON) {
+        windowWidthSIMD = (windowWidthSIMD + 1) & ~(1);
+    }
+#endif
+#if defined(MAL_SUPPORT_AVX512)
+    if (pSRC->useAVX512) {
+        windowWidthSIMD = (windowWidthSIMD + 7) & ~(7);
+    }
+    else
+#endif
+#if defined(MAL_SUPPORT_AVX2)
+    if (pSRC->useAVX2) {
+        windowWidthSIMD = (windowWidthSIMD + 3) & ~(3);
+    }
+    else
+#endif
+#if defined(MAL_SUPPORT_SSE2)
+    if (pSRC->useSSE2) {
+        windowWidthSIMD = (windowWidthSIMD + 1) & ~(1);
+    }
+#endif
+    mal_int32 windowWidthSIMD2 = windowWidthSIMD*2;
+
+
     float* ppNextSamplesOut[MAL_MAX_CHANNELS];
     mal_copy_memory(ppNextSamplesOut, ppSamplesOut, sizeof(void*) * pSRC->config.channels);
 
+    float _windowSamplesUnaligned[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2 + MAL_SIMD_ALIGNMENT];
+    float* windowSamples = (float*)(((mal_uintptr)_windowSamplesUnaligned + MAL_SIMD_ALIGNMENT-1) & ~(MAL_SIMD_ALIGNMENT-1));
+    mal_zero_memory(windowSamples, MAL_SRC_SINC_MAX_WINDOW_WIDTH*2 * sizeof(float));
+
+    float _iWindowFUnaligned[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2 + MAL_SIMD_ALIGNMENT];
+    float* iWindowF = (float*)(((mal_uintptr)_iWindowFUnaligned + MAL_SIMD_ALIGNMENT-1) & ~(MAL_SIMD_ALIGNMENT-1));
+    mal_zero_memory(iWindowF, MAL_SRC_SINC_MAX_WINDOW_WIDTH*2 * sizeof(float));
+    for (mal_int32 i = 0; i < windowWidth2; ++i) {
+        iWindowF[i] = (float)(i - windowWidth);
+    }
+
     mal_uint64 totalOutputFramesRead = 0;
     while (totalOutputFramesRead < frameCount) {
         // The maximum number of frames we can read this iteration depends on how many input samples we have available to us. This is the number
@@ -20087,17 +23089,138 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount
             // Do SRC.
             float timeIn = timeInBeg;
             for (mal_uint32 iSample = 0; iSample < outputFramesToRead; iSample += 1) {
-                mal_int32 iTimeIn  = (mal_int32)timeIn;
+                float sampleOut    = 0;
+                float iTimeInF     = mal_floorf(timeIn);
+                mal_uint32 iTimeIn = (mal_uint32)iTimeInF;
+
+                mal_int32 iWindow = 0;
+
+                // Pre-load the window samples into an aligned buffer to begin with. Need to put these into an aligned buffer to make SIMD easier.
+                windowSamples[0] = 0;   // <-- The first sample is always zero.
+                for (mal_int32 i = 1; i < windowWidth2; ++i) {
+                    windowSamples[i] = pSRC->sinc.input[iChannel][iTimeIn + i];
+                }
+
+#if defined(MAL_SUPPORT_AVX2) || defined(MAL_SUPPORT_AVX512)
+                if (pSRC->useAVX2 || pSRC->useAVX512) {
+                    __m256i ixabs[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2/8];
+                    __m256      a[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2/8];
+                    __m256 resolution256 = _mm256_set1_ps(MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION);
+
+                    __m256 t = _mm256_set1_ps((timeIn - iTimeInF));
+                    __m256 r = _mm256_set1_ps(0);
+
+                    mal_int32 windowWidth8 = windowWidthSIMD2 >> 3;
+                    for (mal_int32 iWindow8 = 0; iWindow8 < windowWidth8; iWindow8 += 1) {
+                        __m256 w = *((__m256*)iWindowF + iWindow8);
 
-                float sampleOut = 0;
-                for (mal_int32 iWindow = -windowWidth+1; iWindow < windowWidth; iWindow += 1) {
-                    float t = (timeIn - iTimeIn);
-                    float w = (float)(iWindow);
+                        __m256 xabs = _mm256_sub_ps(t, w);
+                        xabs = mal_fabsf_avx2(xabs);
+                        xabs = _mm256_mul_ps(xabs, resolution256);
+
+                        ixabs[iWindow8] = _mm256_cvttps_epi32(xabs);
+                            a[iWindow8] = _mm256_sub_ps(xabs, _mm256_cvtepi32_ps(ixabs[iWindow8]));
+                    }
+                    
+                    for (mal_int32 iWindow8 = 0; iWindow8 < windowWidth8; iWindow8 += 1) {
+                        int* ixabsv = (int*)&ixabs[iWindow8];
+
+                        __m256 lo = _mm256_set_ps(
+                            pSRC->sinc.table[ixabsv[7]],
+                            pSRC->sinc.table[ixabsv[6]],
+                            pSRC->sinc.table[ixabsv[5]],
+                            pSRC->sinc.table[ixabsv[4]],
+                            pSRC->sinc.table[ixabsv[3]],
+                            pSRC->sinc.table[ixabsv[2]],
+                            pSRC->sinc.table[ixabsv[1]],
+                            pSRC->sinc.table[ixabsv[0]]
+                        );
+    
+                        __m256 hi = _mm256_set_ps(
+                            pSRC->sinc.table[ixabsv[7]+1],
+                            pSRC->sinc.table[ixabsv[6]+1],
+                            pSRC->sinc.table[ixabsv[5]+1],
+                            pSRC->sinc.table[ixabsv[4]+1],
+                            pSRC->sinc.table[ixabsv[3]+1],
+                            pSRC->sinc.table[ixabsv[2]+1],
+                            pSRC->sinc.table[ixabsv[1]+1],
+                            pSRC->sinc.table[ixabsv[0]+1]
+                        );
+
+                        __m256 s = *((__m256*)windowSamples + iWindow8);
+                        r = _mm256_add_ps(r, _mm256_mul_ps(s, mal_mix_f32_fast__avx2(lo, hi, a[iWindow8])));
+                    }
+
+                    // Horizontal add.
+                    __m256 x = _mm256_hadd_ps(r, _mm256_permute2f128_ps(r, r, 1));
+                           x = _mm256_hadd_ps(x, x);
+                           x = _mm256_hadd_ps(x, x);
+                    sampleOut += _mm_cvtss_f32(_mm256_castps256_ps128(x));
+
+                    iWindow += windowWidth8 * 8;
+                }
+                else
+#endif
+#if defined(MAL_SUPPORT_SSE2)
+                if (pSRC->useSSE2) {
+                    __m128 t = _mm_set1_ps((timeIn - iTimeInF));
+                    __m128 r = _mm_set1_ps(0);
+
+                    mal_int32 windowWidth4 = windowWidthSIMD2 >> 2;
+                    for (mal_int32 iWindow4 = 0; iWindow4 < windowWidth4; iWindow4 += 1) {
+                        __m128* s = (__m128*)windowSamples + iWindow4;
+                        __m128* w = (__m128*)iWindowF + iWindow4;
+
+                        __m128 a = mal_src_sinc__interpolation_factor__sse2(pSRC, _mm_sub_ps(t, *w));
+                        r = _mm_add_ps(r, _mm_mul_ps(*s, a));
+                    }
+
+                    sampleOut += ((float*)(&r))[0];
+                    sampleOut += ((float*)(&r))[1];
+                    sampleOut += ((float*)(&r))[2];
+                    sampleOut += ((float*)(&r))[3];
+
+                    iWindow += windowWidth4 * 4;
+                }
+                else
+#endif
+#if defined(MAL_SUPPORT_NEON)
+                if (pSRC->useNEON) {
+                    float32x4_t t = vmovq_n_f32((timeIn - iTimeInF));
+                    float32x4_t r = vmovq_n_f32(0);
+
+                    mal_int32 windowWidth4 = windowWidthSIMD2 >> 2;
+                    for (mal_int32 iWindow4 = 0; iWindow4 < windowWidth4; iWindow4 += 1) {
+                        float32x4_t* s = (float32x4_t*)windowSamples + iWindow4;
+                        float32x4_t* w = (float32x4_t*)iWindowF + iWindow4;
+
+                        float32x4_t a = mal_src_sinc__interpolation_factor__neon(pSRC, vsubq_f32(t, *w));
+                        r = vaddq_f32(r, vmulq_f32(*s, a));
+                    }
+
+                    sampleOut += ((float*)(&r))[0];
+                    sampleOut += ((float*)(&r))[1];
+                    sampleOut += ((float*)(&r))[2];
+                    sampleOut += ((float*)(&r))[3];
+
+                    iWindow += windowWidth4 * 4;
+                }
+                else
+#endif
+                {
+                    iWindow += 1;   // The first one is a dummy for SIMD alignment purposes. Skip it.
+                }
+
+                // Non-SIMD/Reference implementation. 
+                float t = (timeIn - iTimeIn);
+                for (; iWindow < windowWidth2; iWindow += 1) {
+                    float s = windowSamples[iWindow];
+                    float w = iWindowF[iWindow];
 
                     float a = mal_src_sinc__interpolation_factor(pSRC, (t - w));
-                    float s = mal_src_sinc__get_input_sample_from_window(pSRC, iChannel, iTimeIn, iWindow);
+                    float r = s * a;
 
-                    sampleOut += s * a;
+                    sampleOut += r;
                 }
 
                 ppNextSamplesOut[iChannel][iSample] = (float)sampleOut;
@@ -20359,7 +23482,8 @@ mal_result mal_dsp_init(const mal_dsp_config* pConfig, mal_dsp* pDSP)
     pDSP->pUserData = pConfig->pUserData;
     pDSP->isDynamicSampleRateAllowed = pConfig->allowDynamicSampleRate;
 
-    // This is generally the pipeline used for data conversion. Note that this can actually change which is explained later.
+
+    // In general, this is the pipeline used for data conversion. Note that this can actually change which is explained later.
     //
     //   Pre Format Conversion -> Sample Rate Conversion -> Channel Routing -> Post Format Conversion
     //
@@ -20455,6 +23579,10 @@ mal_result mal_dsp_init(const mal_dsp_config* pConfig, mal_dsp* pDSP)
             pDSP
         );
         preFormatConverterConfig.ditherMode = pConfig->ditherMode;
+        preFormatConverterConfig.noSSE2     = pConfig->noSSE2;
+        preFormatConverterConfig.noAVX2     = pConfig->noAVX2;
+        preFormatConverterConfig.noAVX512   = pConfig->noAVX512;
+        preFormatConverterConfig.noNEON     = pConfig->noNEON;
 
         result = mal_format_converter_init(&preFormatConverterConfig, &pDSP->formatConverterIn);
         if (result != MAL_SUCCESS) {
@@ -20466,10 +23594,14 @@ mal_result mal_dsp_init(const mal_dsp_config* pConfig, mal_dsp* pDSP)
     // or from an earlier stage in the pipeline.
     {
         mal_format_converter_config postFormatConverterConfig = mal_format_converter_config_init_new();
-        postFormatConverterConfig.formatIn = pConfig->formatIn;
-        postFormatConverterConfig.formatOut = pConfig->formatOut;
-        postFormatConverterConfig.channels = pConfig->channelsOut;
+        postFormatConverterConfig.formatIn   = pConfig->formatIn;
+        postFormatConverterConfig.formatOut  = pConfig->formatOut;
+        postFormatConverterConfig.channels   = pConfig->channelsOut;
         postFormatConverterConfig.ditherMode = pConfig->ditherMode;
+        postFormatConverterConfig.noSSE2     = pConfig->noSSE2;
+        postFormatConverterConfig.noAVX2     = pConfig->noAVX2;
+        postFormatConverterConfig.noAVX512   = pConfig->noAVX512;
+        postFormatConverterConfig.noNEON     = pConfig->noNEON;
         if (pDSP->isPreFormatConversionRequired) {
             postFormatConverterConfig.onReadDeinterleaved = mal_dsp__post_format_converter_on_read_deinterleaved;
             postFormatConverterConfig.formatIn = mal_format_f32;
@@ -20493,6 +23625,10 @@ mal_result mal_dsp_init(const mal_dsp_config* pConfig, mal_dsp* pDSP)
             pDSP
         );
         srcConfig.algorithm = pConfig->srcAlgorithm;
+        srcConfig.noSSE2    = pConfig->noSSE2;
+        srcConfig.noAVX2    = pConfig->noAVX2;
+        srcConfig.noAVX512  = pConfig->noAVX512;
+        srcConfig.noNEON    = pConfig->noNEON;
         mal_copy_memory(&srcConfig.sinc, &pConfig->sinc, sizeof(pConfig->sinc));
 
         result = mal_src_init(&srcConfig, &pDSP->src);
@@ -20511,6 +23647,10 @@ mal_result mal_dsp_init(const mal_dsp_config* pConfig, mal_dsp* pDSP)
             pConfig->channelMixMode,
             mal_dsp__channel_router_on_read_deinterleaved,
             pDSP);
+        routerConfig.noSSE2   = pConfig->noSSE2;
+        routerConfig.noAVX2   = pConfig->noAVX2;
+        routerConfig.noAVX512 = pConfig->noAVX512;
+        routerConfig.noNEON   = pConfig->noNEON;
 
         result = mal_channel_router_init(&routerConfig, &pDSP->channelRouter);
         if (result != MAL_SUCCESS) {
@@ -20620,6 +23760,7 @@ typedef struct
     mal_uint32 channelsIn;
     mal_uint64 totalFrameCount;
     mal_uint64 iNextFrame;
+    mal_bool32 isFeedingZeros;  // When set to true, feeds the DSP zero samples.
 } mal_convert_frames__data;
 
 mal_uint32 mal_convert_frames__on_read(mal_dsp* pDSP, mal_uint32 frameCount, void* pFramesOut, void* pUserData)
@@ -20636,8 +23777,13 @@ mal_uint32 mal_convert_frames__on_read(mal_dsp* pDSP, mal_uint32 frameCount, voi
         framesToRead = (mal_uint32)framesRemaining;
     }
 
-    mal_uint32 frameSizeInBytes = mal_get_bytes_per_sample(pData->formatIn) * pData->channelsIn;
-    mal_copy_memory(pFramesOut, (const mal_uint8*)pData->pDataIn + (frameSizeInBytes * pData->iNextFrame), frameSizeInBytes * framesToRead);
+    mal_uint32 frameSizeInBytes = mal_get_bytes_per_frame(pData->formatIn, pData->channelsIn);
+
+    if (!pData->isFeedingZeros) {
+        mal_copy_memory(pFramesOut, (const mal_uint8*)pData->pDataIn + (frameSizeInBytes * pData->iNextFrame), frameSizeInBytes * framesToRead);
+    } else {
+        mal_zero_memory(pFramesOut, frameSizeInBytes * framesToRead);
+    }
 
     pData->iNextFrame += framesToRead;
     return framesToRead;
@@ -20708,6 +23854,7 @@ mal_uint64 mal_convert_frames_ex(void* pOut, mal_format formatOut, mal_uint32 ch
     data.channelsIn = channelsIn;
     data.totalFrameCount = frameCountIn;
     data.iNextFrame = 0;
+    data.isFeedingZeros = MAL_FALSE;
 
     mal_dsp_config config;
     mal_zero_object(&config);
@@ -20738,7 +23885,38 @@ mal_uint64 mal_convert_frames_ex(void* pOut, mal_format formatOut, mal_uint32 ch
         return 0;
     }
 
-    return mal_dsp_read(&dsp, frameCountOut, pOut, dsp.pUserData);
+    // Always output our computed frame count. There is a chance the sample rate conversion routine may not output the last sample
+    // due to precision issues with 32-bit floats, in which case we should feed the DSP zero samples so it can generate that last
+    // frame.
+    mal_uint64 totalFramesRead = mal_dsp_read(&dsp, frameCountOut, pOut, dsp.pUserData);
+    if (totalFramesRead < frameCountOut) {
+        mal_uint32 bpf = mal_get_bytes_per_frame(formatIn, channelsIn);
+
+        data.isFeedingZeros = MAL_TRUE;
+        data.totalFrameCount = 0xFFFFFFFFFFFFFFFF;
+        data.pDataIn = NULL;
+
+        while (totalFramesRead < frameCountOut) {
+            mal_uint64 framesToRead = (frameCountOut - totalFramesRead);
+            mal_assert(framesToRead > 0);
+
+            mal_uint64 framesJustRead = mal_dsp_read(&dsp, framesToRead, mal_offset_ptr(pOut, totalFramesRead * bpf), dsp.pUserData);
+            totalFramesRead += framesJustRead;
+
+            if (framesJustRead < framesToRead) {
+                break;
+            }
+        }
+
+        // At this point we should have output every sample, but just to be super duper sure, just fill the rest with zeros.
+        if (totalFramesRead < frameCountOut) {
+            mal_zero_memory_64(mal_offset_ptr(pOut, totalFramesRead * bpf), ((frameCountOut - totalFramesRead) * bpf));
+            totalFramesRead = frameCountOut;
+        }
+    }
+
+    mal_assert(totalFramesRead == frameCountOut);
+    return totalFramesRead;
 }
 
 
@@ -20802,7 +23980,7 @@ const char* mal_get_backend_name(mal_backend backend)
         case mal_backend_alsa:       return "ALSA";
         case mal_backend_pulseaudio: return "PulseAudio";
         case mal_backend_jack:       return "JACK";
-        //case mal_backend_coreaudio:  return "Core Audio";
+        case mal_backend_coreaudio:  return "Core Audio";
         case mal_backend_oss:        return "OSS";
         case mal_backend_opensl:     return "OpenSL|ES";
         case mal_backend_openal:     return "OpenAL";
@@ -20874,27 +24052,45 @@ float mal_calculate_cpu_speed_factor()
     mal_uint32 channelsOut   = 6;
 
     // Using the heap here to avoid an unnecessary static memory allocation. Also too big for the stack.
-    mal_uint8* pInputFrames = (mal_uint8*)mal_aligned_malloc(sampleRateIn * channelsIn * sizeof(*pInputFrames), MAL_SIMD_ALIGNMENT);
-    if (pInputFrames == NULL) {
-        return 1;
-    }
+    mal_uint8* pInputFrames  = NULL;
+    float*     pOutputFrames = NULL;
 
-    float* pOutputFrames = (float*)mal_aligned_malloc(sampleRateOut * channelsOut * sizeof(*pOutputFrames), MAL_SIMD_ALIGNMENT);
-    if (pOutputFrames == NULL) {
-        mal_aligned_free(pInputFrames);
+    size_t inputDataSize  = sampleRateIn  * channelsIn  * sizeof(*pInputFrames);
+    size_t outputDataSize = sampleRateOut * channelsOut * sizeof(*pOutputFrames);
+
+    void* pData = mal_malloc(inputDataSize + outputDataSize);
+    if (pData == NULL) {
         return 1;
     }
 
+    pInputFrames  = (mal_uint8*)pData;
+    pOutputFrames = (float*)(pInputFrames + inputDataSize);
+
+
+    
+
     mal_calculate_cpu_speed_factor_data data;
     data.pInputFrames = pInputFrames;
     data.framesRemaining = sampleRateIn;
     
     mal_dsp_config config = mal_dsp_config_init(mal_format_u8, channelsIn, sampleRateIn, mal_format_f32, channelsOut, sampleRateOut, mal_calculate_cpu_speed_factor__on_read, &data);
+
+    // Use linear sample rate conversion because it's the simplest and least likely to cause skewing as a result of tweaks to default
+    // configurations in the future.
+    config.srcAlgorithm = mal_src_algorithm_linear;
+
+    // Experiment: Disable SIMD extensions when profiling just to try and keep things a bit more consistent. The idea is to get a general
+    // indication on the speed of the system, but SIMD is used more heavily in the DSP pipeline than in the general case which may make
+    // the results a little less realistic.
+    config.noSSE2   = MAL_TRUE;
+    config.noAVX2   = MAL_TRUE;
+    config.noAVX512 = MAL_TRUE;
+    config.noNEON   = MAL_TRUE;
+
     mal_dsp dsp;
     mal_result result = mal_dsp_init(&config, &dsp);
     if (result != MAL_SUCCESS) {
-        mal_aligned_free(pInputFrames);
-        mal_aligned_free(pOutputFrames);
+        mal_free(pData);
         return 1;
     }
 
@@ -20915,9 +24111,7 @@ float mal_calculate_cpu_speed_factor()
     executionTimeInSeconds /= iterationCount;
 
     
-    mal_aligned_free(pInputFrames);
-    mal_aligned_free(pOutputFrames);
-
+    mal_free(pData);
     return (float)(executionTimeInSeconds * f);
 }
 
@@ -21663,7 +24857,7 @@ mal_uint32 mal_decoder_internal_on_read_frames__raw(mal_dsp* pDSP, mal_uint32 fr
 
     // For raw decoding we just read directly from the decoder's callbacks.
     mal_uint32 bpf = mal_get_bytes_per_frame(pDecoder->internalFormat, pDecoder->internalChannels);
-    return pDecoder->onRead(pDecoder, pSamplesOut, frameCount * bpf) / bpf;
+    return (mal_uint32)pDecoder->onRead(pDecoder, pSamplesOut, frameCount * bpf) / bpf;
 }
 
 mal_result mal_decoder_init_raw__internal(const mal_decoder_config* pConfigIn, const mal_decoder_config* pConfigOut, mal_decoder* pDecoder)
@@ -21792,6 +24986,13 @@ mal_result mal_decoder_init__internal(mal_decoder_read_proc onRead, mal_decoder_
     mal_assert(pConfig != NULL);
     mal_assert(pDecoder != NULL);
 
+    // Silence some warnings in the case that we don't have any decoder backends enabled.
+    (void)onRead;
+    (void)onSeek;
+    (void)pUserData;
+    (void)pConfig;
+    (void)pDecoder;
+
     // We use trial and error to open a decoder.
     mal_result result = MAL_NO_BACKEND;
 
@@ -22380,7 +25581,7 @@ mal_result mal_sine_wave_init(double amplitude, double periodsPerSecond, mal_uin
 
     pSineWave->amplitude = amplitude;
     pSineWave->periodsPerSecond = periodsPerSecond;
-    pSineWave->delta = MAL_PI_D*2 / sampleRate;
+    pSineWave->delta = MAL_TAU_D / sampleRate;
     pSineWave->time = 0;
 
     return MAL_SUCCESS;
@@ -22426,6 +25627,7 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSineWave, mal_uint64 count, float*
 //   - API CHANGE: Change the default channel mapping to the standard Microsoft mapping.
 //   - API CHANGE: Remove backend-specific result codes.
 //   - API CHANGE: Changes to the format conversion APIs (mal_pcm_f32_to_s16(), etc.)
+//   - Add support for Core Audio (Apple).
 //   - Add support for PulseAudio.
 //     - This is the highest priority backend on Linux (higher priority than ALSA) since it is commonly
 //       installed by default on many of the popular distros and offer's more seamless integration on
@@ -22448,12 +25650,14 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSineWave, mal_uint64 count, float*
 //     as the backend's internal device, and as such results in a pass-through data transmission pipeline.
 //   - Add support for passing in NULL for the device config in mal_device_init(), which uses a default
 //     config. This requires manually calling mal_device_set_send/recv_callback().
+//   - Add support for decoding from raw PCM data (mal_decoder_init_raw(), etc.)
 //   - Make mal_device_init_ex() more robust.
 //   - Make some APIs more const-correct.
+//   - Fix errors with SDL detection on Apple platforms.
 //   - Fix errors with OpenAL detection.
 //   - Fix some memory leaks.
 //   - Fix a bug with opening decoders from memory.
-//   - Add support for decoding from raw PCM data (mal_decoder_init_raw(), etc.)
+//   - Early work on SSE2, AVX2 and NEON optimizations.
 //   - Miscellaneous bug fixes.
 //   - Documentation updates.
 //