PerchunPak · December 21, 2024 22:07
diff --git a/python3.12-simsimd-6.2.2.log b/python3.12-simsimd-6.2.2.log
 Sourcing python-remove-tests-dir-hook
 Sourcing python-catch-conflicts-hook.sh
 Sourcing python-remove-bin-bytecode-hook.sh
 Sourcing pypa-build-hook
 Using pypaBuildPhase
 Sourcing python-runtime-deps-check-hook
 Using pythonRuntimeDepsCheckHook
 Sourcing pypa-install-hook
 Using pypaInstallPhase
 Sourcing python-imports-check-hook.sh
 Using pythonImportsCheckPhase
 Sourcing python-namespaces-hook
 Sourcing python-catch-conflicts-hook.sh
 Sourcing pytest-check-hook
 Using pytestCheckPhase
 Running phase: unpackPhase
 @nix { "action": "setPhase", "phase": "unpackPhase" }
 unpacking source archive /nix/store/y30jnfhz06qv7vxq8bq0s63pdn9avpxq-source
 source root is source
 setting SOURCE_DATE_EPOCH to timestamp 315619200 of file source/swift/Test.swift
 Running phase: patchPhase
 @nix { "action": "setPhase", "phase": "patchPhase" }
 Running phase: updateAutotoolsGnuConfigScriptsPhase
 @nix { "action": "setPhase", "phase": "updateAutotoolsGnuConfigScriptsPhase" }
 Running phase: configurePhase
 @nix { "action": "setPhase", "phase": "configurePhase" }
 no configure script, doing nothing
 Running phase: buildPhase
 @nix { "action": "setPhase", "phase": "buildPhase" }
 Executing pypaBuildPhase
 Creating a wheel...
 pypa build flags: --no-isolation --outdir dist/ --wheel
 * Getting build dependencies for wheel...
 running egg_info
 creating simsimd.egg-info
 writing simsimd.egg-info/PKG-INFO
 writing dependency_links to simsimd.egg-info/dependency_links.txt
 writing top-level names to simsimd.egg-info/top_level.txt
 writing manifest file 'simsimd.egg-info/SOURCES.txt'
 reading manifest file 'simsimd.egg-info/SOURCES.txt'
 reading manifest template 'MANIFEST.in'
 adding license file 'LICENSE'
 writing manifest file 'simsimd.egg-info/SOURCES.txt'
 * Building wheel...
 running bdist_wheel
 running build
 running build_py
 running egg_info
 writing simsimd.egg-info/PKG-INFO
 writing dependency_links to simsimd.egg-info/dependency_links.txt
 writing top-level names to simsimd.egg-info/top_level.txt
 reading manifest file 'simsimd.egg-info/SOURCES.txt'
 reading manifest template 'MANIFEST.in'
 adding license file 'LICENSE'
 writing manifest file 'simsimd.egg-info/SOURCES.txt'
 creating build/lib.linux-aarch64-cpython-312/simsimd
 copying python/annotations/__init__.pyi -> build/lib.linux-aarch64-cpython-312/simsimd
 copying python/annotations/py.typed -> build/lib.linux-aarch64-cpython-312/simsimd
 running build_ext
 building 'simsimd' extension
 creating build/temp.linux-aarch64-cpython-312/c
 creating build/temp.linux-aarch64-cpython-312/python
 gcc -fno-strict-overflow -Wsign-compare -DNDEBUG -g -O3 -Wall -I/nix/store/73xhk9yyxqhy7wz0sqpakkgv0h9lvm61-libxcrypt-4.4.36/include -fPIC -DSIMSIMD_NATIVE_F16=0 -DSIMSIMD_NATIVE_BF16=0 -DSIMSIMD_DYNAMIC_DISPATCH=1 -DSIMSIMD_TARGET_NEON=1 -DSIMSIMD_TARGET_NEON_F16=1 -DSIMSIMD_TARGET_NEON_BF16=1 -DSIMSIMD_TARGET_SVE=1 -DSIMSIMD_TARGET_SVE_F16=1 -DSIMSIMD_TARGET_SVE_BF16=1 -DSIMSIMD_TARGET_SVE2=1 -DSIMSIMD_TARGET_HASWELL=1 -DSIMSIMD_TARGET_SKYLAKE=1 -DSIMSIMD_TARGET_ICE=1 -DSIMSIMD_TARGET_GENOA=1 -DSIMSIMD_TARGET_SAPPHIRE=1 -DSIMSIMD_TARGET_TURIN=1 -DSIMSIMD_TARGET_SIERRA=0 -Iinclude -I/nix/store/c1fbv3y657fp2m514gjxqqgqfsvayp6v-python3-3.12.7/include/python3.12 -c c/lib.c -o build/temp.linux-aarch64-cpython-312/c/lib.o -std=c11 -O3 -ffast-math -fdiagnostics-color=always -fvisibility=default -fPIC -w -fopenmp
 gcc -fno-strict-overflow -Wsign-compare -DNDEBUG -g -O3 -Wall -I/nix/store/73xhk9yyxqhy7wz0sqpakkgv0h9lvm61-libxcrypt-4.4.36/include -fPIC -DSIMSIMD_NATIVE_F16=0 -DSIMSIMD_NATIVE_BF16=0 -DSIMSIMD_DYNAMIC_DISPATCH=1 -DSIMSIMD_TARGET_NEON=1 -DSIMSIMD_TARGET_NEON_F16=1 -DSIMSIMD_TARGET_NEON_BF16=1 -DSIMSIMD_TARGET_SVE=1 -DSIMSIMD_TARGET_SVE_F16=1 -DSIMSIMD_TARGET_SVE_BF16=1 -DSIMSIMD_TARGET_SVE2=1 -DSIMSIMD_TARGET_HASWELL=1 -DSIMSIMD_TARGET_SKYLAKE=1 -DSIMSIMD_TARGET_ICE=1 -DSIMSIMD_TARGET_GENOA=1 -DSIMSIMD_TARGET_SAPPHIRE=1 -DSIMSIMD_TARGET_TURIN=1 -DSIMSIMD_TARGET_SIERRA=0 -Iinclude -I/nix/store/c1fbv3y657fp2m514gjxqqgqfsvayp6v-python3-3.12.7/include/python3.12 -c python/lib.c -o build/temp.linux-aarch64-cpython-312/python/lib.o -std=c11 -O3 -ffast-math -fdiagnostics-color=always -fvisibility=default -fPIC -w -fopenmp
 gcc -shared -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-bzip2-1.0.8/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-expat-2.6.4/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-libffi-3.4.6/lib -L/nix/store/73xhk9yyxqhy7wz0sqpakkgv0h9lvm61-libxcrypt-4.4.36/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-mpdecimal-4.0.0/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-ncurses-6.4.20221231/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-openssl-3.3.2/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-sqlite-3.46.1/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-xz-5.6.3/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-zlib-1.3.1/lib -L/nix/store/7a7srac2q5ymc8nab9bhf6rk8ya2l5m6-tzdata-2024b/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-gdbm-1.24-lib/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-readline-8.2p13/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-bzip2-1.0.8/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-expat-2.6.4/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-libffi-3.4.6/lib -L/nix/store/73xhk9yyxqhy7wz0sqpakkgv0h9lvm61-libxcrypt-4.4.36/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-mpdecimal-4.0.0/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-ncurses-6.4.20221231/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-openssl-3.3.2/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-sqlite-3.46.1/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-xz-5.6.3/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-zlib-1.3.1/lib -L/nix/store/7a7srac2q5ymc8nab9bhf6rk8ya2l5m6-tzdata-2024b/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-gdbm-1.24-lib/lib -L/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-readline-8.2p13/lib build/temp.linux-aarch64-cpython-312/c/lib.o build/temp.linux-aarch64-cpython-312/python/lib.o -L/nix/store/c1fbv3y657fp2m514gjxqqgqfsvayp6v-python3-3.12.7/lib -o build/lib.linux-aarch64-cpython-312/simsimd.cpython-312-aarch64-linux-gnu.so -shared -fopenmp -lm
 installing to build/bdist.linux-aarch64/wheel
 running install
 running install_lib
 creating build/bdist.linux-aarch64/wheel
 creating build/bdist.linux-aarch64/wheel/simsimd
 copying build/lib.linux-aarch64-cpython-312/simsimd/__init__.pyi -> build/bdist.linux-aarch64/wheel/./simsimd
 copying build/lib.linux-aarch64-cpython-312/simsimd/py.typed -> build/bdist.linux-aarch64/wheel/./simsimd
 copying build/lib.linux-aarch64-cpython-312/simsimd.cpython-312-aarch64-linux-gnu.so -> build/bdist.linux-aarch64/wheel/.
 running install_egg_info
 Copying simsimd.egg-info to build/bdist.linux-aarch64/wheel/./simsimd-6.2.2-py3.12.egg-info
 running install_scripts
 creating build/bdist.linux-aarch64/wheel/simsimd-6.2.2.dist-info/WHEEL
 creating '/build/source/dist/.tmp-6swbnayu/simsimd-6.2.2-cp312-cp312-linux_aarch64.whl' and adding 'build/bdist.linux-aarch64/wheel' to it
 adding 'simsimd.cpython-312-aarch64-linux-gnu.so'
 adding 'simsimd/__init__.pyi'
 adding 'simsimd/py.typed'
 adding 'simsimd-6.2.2.dist-info/LICENSE'
 adding 'simsimd-6.2.2.dist-info/METADATA'
 adding 'simsimd-6.2.2.dist-info/WHEEL'
 adding 'simsimd-6.2.2.dist-info/top_level.txt'
 adding 'simsimd-6.2.2.dist-info/RECORD'
 removing build/bdist.linux-aarch64/wheel
 Successfully built simsimd-6.2.2-cp312-cp312-linux_aarch64.whl
 Finished creating a wheel...
 Finished executing pypaBuildPhase
 buildPhase completed in 52 seconds
 Running phase: pythonRuntimeDepsCheckHook
 @nix { "action": "setPhase", "phase": "pythonRuntimeDepsCheckHook" }
 Executing pythonRuntimeDepsCheck
 Checking runtime dependencies for simsimd-6.2.2-cp312-cp312-linux_aarch64.whl
 Finished executing pythonRuntimeDepsCheck
 Running phase: installPhase
 @nix { "action": "setPhase", "phase": "installPhase" }
 Executing pypaInstallPhase
 Successfully installed simsimd-6.2.2-cp312-cp312-linux_aarch64.whl
 Finished executing pypaInstallPhase
 Running phase: pythonOutputDistPhase
 @nix { "action": "setPhase", "phase": "pythonOutputDistPhase" }
 Executing pythonOutputDistPhase
 Finished executing pythonOutputDistPhase
 Running phase: fixupPhase
 @nix { "action": "setPhase", "phase": "fixupPhase" }
 shrinking RPATHs of ELF executables and libraries in /nix/store/yp70afy166a4q2mb4wrlbrg0a64pjfhb-python3.12-simsimd-6.2.2
 shrinking /nix/store/yp70afy166a4q2mb4wrlbrg0a64pjfhb-python3.12-simsimd-6.2.2/lib/python3.12/site-packages/simsimd.cpython-312-aarch64-linux-gnu.so
 checking for references to /build/ in /nix/store/yp70afy166a4q2mb4wrlbrg0a64pjfhb-python3.12-simsimd-6.2.2...
 patching script interpreter paths in /nix/store/yp70afy166a4q2mb4wrlbrg0a64pjfhb-python3.12-simsimd-6.2.2
 stripping (with command strip and flags -S -p) in  /nix/store/yp70afy166a4q2mb4wrlbrg0a64pjfhb-python3.12-simsimd-6.2.2/lib
 shrinking RPATHs of ELF executables and libraries in /nix/store/6ijzkiqi9m6gb64k1l5053y2mj57l8sq-python3.12-simsimd-6.2.2-dist
 checking for references to /build/ in /nix/store/6ijzkiqi9m6gb64k1l5053y2mj57l8sq-python3.12-simsimd-6.2.2-dist...
 patching script interpreter paths in /nix/store/6ijzkiqi9m6gb64k1l5053y2mj57l8sq-python3.12-simsimd-6.2.2-dist
 Executing pythonRemoveTestsDir
 Finished executing pythonRemoveTestsDir
 Running phase: installCheckPhase
 @nix { "action": "setPhase", "phase": "installCheckPhase" }
 no Makefile or custom installCheckPhase, doing nothing
 Running phase: pythonCatchConflictsPhase
 @nix { "action": "setPhase", "phase": "pythonCatchConflictsPhase" }
 Running phase: pythonRemoveBinBytecodePhase
 @nix { "action": "setPhase", "phase": "pythonRemoveBinBytecodePhase" }
 Running phase: pythonImportsCheckPhase
 @nix { "action": "setPhase", "phase": "pythonImportsCheckPhase" }
 Executing pythonImportsCheckPhase
 Check whether the following modules can be imported: simsimd
 Running phase: pytestCheckPhase
 @nix { "action": "setPhase", "phase": "pytestCheckPhase" }
 Executing pytestCheckPhase
 ============================= test session starts ==============================
 platform linux -- Python 3.12.7, pytest-8.3.3, pluggy-1.5.0
 rootdir: /build/source
 configfile: pyproject.toml
 plugins: repeat-0.9.3
 collecting ... 
 collecting 9390 items                                                          
 collected 9390 items                                                           

 scripts/test.py ........................................................ [  0%]
 ........................................................................ [  1%]
 ........................................................................ [  2%]
 ........................................................................ [  2%]
 ........................................................................ [  3%]
 ........................................................................ [  4%]
 ........................................................................ [  5%]
 ........................................................................ [  5%]
 ........................................................................ [  6%]
 ........................................................................ [  7%]
 ........................................................................ [  8%]
 ........................................................................ [  9%]
 ........................................................................ [  9%]
 ........................................................................ [ 10%]
 ........................................................................ [ 11%]
 ........................................................................ [ 12%]
 ........................................................................ [ 12%]
 ........................................................................ [ 13%]
 ........................................................................ [ 14%]
 ........................................................................ [ 15%]
 ........................................................................ [ 15%]
 ......F.F......................................................FF..FF..F [ 16%]
 ..................................................F...FF................ [ 17%]
 ......................................F..FF..F.......................... [ 18%]
 ..........................F....FFFF..................................... [ 18%]
 ...............FF...FF.................................................. [ 19%]
 ...F...F...F..................................................F.F.FF.FFF [ 20%]
 ........................................................................ [ 21%]
 ........................................................................ [ 22%]
 ........................................................................ [ 22%]
 ........................................................................ [ 23%]
 ........................................................................ [ 24%]
 ........................................................................ [ 25%]
 ........................................................................ [ 25%]
 ........................................................................ [ 26%]
 ........................................................................ [ 27%]
 ..................F.FF.FFF..F....................F.F.F.FF..FFF.......... [ 28%]
 ......FFF....FFF...F...............F..FFF.....F..................F.FF..F [ 28%]
 .F.FFF.................FF.F.F..FFF..F................FF........F...F.... [ 29%]
 ...........F..F..F..FF.FFF.............................................. [ 30%]
 ........................................................................ [ 31%]
 ........................................................................ [ 32%]
 ........................................................................ [ 32%]
 ........................................................................ [ 33%]
 ........................................................................ [ 34%]
 ........................................................................ [ 35%]
 ........................................................................ [ 35%]
 ........................................................................ [ 36%]
 ........................................................................ [ 37%]
 ........................................................................ [ 38%]
 ........................................................................ [ 38%]
 ........................................................................ [ 39%]
 ..................................................ssssssssssssssssssssss [ 40%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 41%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 42%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 42%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 43%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 44%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 45%]
 ssssssssssssssssssssssssss.............................................. [ 45%]
 ..ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 46%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 47%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 48%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 48%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 49%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 50%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 51%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 51%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 52%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 53%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 54%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 55%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 55%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 56%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 57%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 58%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 58%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 59%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 60%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 61%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 61%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 62%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 63%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 64%]
 ssssssssssssssssssssssssssssssssss...................................... [ 65%]
 ........................................................................ [ 65%]
 ........................................................................ [ 66%]
 ............................................................FF....F..... [ 67%]
 F.......F............................................................... [ 68%]
 ......F................FF..........F.........F............F...F......... [ 68%]
 ..................................F...F.F.F...F.........F............... [ 69%]
 ........................................F.F.....FF..F.......F.......F... [ 70%]
 ...................F.......................................F.FFF.......F [ 71%]
 .F..F..F......................F........................F................ [ 71%]
 ...........F.F.F....F...FF.F..F.............F...............F........... [ 72%]
 ............................F.F.F..F.............F.F....F............... [ 73%]
 ............................................F...F...F..F.F..F........F.. [ 74%]
 ........................................................................ [ 74%]
 ........................................................................ [ 75%]
 ........................................................................ [ 76%]
 ........................................................................ [ 77%]
 ........................................................................ [ 78%]
 ........................................................................ [ 78%]
 ........................................................................ [ 79%]
 ........................................................................ [ 80%]
 ........................................................................ [ 81%]
 ........................................................................ [ 81%]
 ........................................................................ [ 82%]
 ........................................................................ [ 83%]
 ........................................................................ [ 84%]
 ........................................................................ [ 84%]
 ........................................................................ [ 85%]
 ........................................................................ [ 86%]
 ........................................................................ [ 87%]
 ..................................ssssssssssssssssssssssssssssssssssssss [ 88%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 88%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 89%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 90%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 91%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss.. [ 91%]
 ........................................................................ [ 92%]
 ........................................................................ [ 93%]
 ........................................................................ [ 94%]
 ......................................................................ss [ 94%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 95%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 96%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 97%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 97%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 98%]
 ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss [ 99%]
 ssssssssssssssssssssssssssssssssssssssssssssss                           [100%]

 =================================== FAILURES ===================================
 _________________ test_curved[neon-mahalanobis-dtypes1-11-5-5] _________________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.13167344, 0.15050596, 0.18967141, 0.0307388 , 0.22518316,
       0.01336799, 0.04072221, 0.02369594, 0.16490129, 0.01101731,
       0.01852258], dtype=float32)
 b          = array([1.0619957e-01, 3.3094604e-02, 1.1910046e-01, 1.3819897e-01,
       9.8109081e-02, 1.8382599e-04, 1.0604137e-01, 1.2716078e-02,
       4.7696918e-02, 2.2190773e-01, 1.1675142e-01], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 3.557975 ,  3.4199727,  5.727091 ,  3.7900312,  4.64915  ,
         3.641796 ,  4.7969604,  4.1330605,  3.114... ,  8.731266 ,
         5.511739 ,  7.0755553,  4.938113 ,  6.867126 ,  8.196839 ,
         6.59861  ]], dtype=float32)
 capability = 'neon'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.13167344, 0.15050596, 0.18967141, 0.0307388 , 0.22518316,
       0.01336799, 0.04072221, 0.02369594, 0.16490...71147394,  8.73126602,
         5.51173878,  7.07555532,  4.93811321,  6.86712599,  8.19683933,
         6.59860992]]))
        before     = 9820886571621
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.13167344, 0.15050596, 0.18967141, 0.0307388 , 0.22518316,
       0.01336799, 0.04072221, 0.02369594, 0.16490129, 0.01101731,
       0.01852258])
 y = array([1.06199570e-01, 3.30946036e-02, 1.19100459e-01, 1.38198972e-01,
       9.81090814e-02, 1.83825992e-04, 1.06041372e-01, 1.27160782e-02,
       4.76969182e-02, 2.21907735e-01, 1.16751418e-01])
 z = array([[ 3.55797505,  3.41997266,  5.72709084,  3.79003119,  4.64914989,
         3.64179611,  4.79696035,  4.13306046....71147394,  8.73126602,
         5.51173878,  7.07555532,  4.93811321,  6.86712599,  8.19683933,
         6.59860992]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.02547387,  0.11741136,  0.07057095, -0.10746017,  0.12707408,
        0.01318417, -0.06531917,  0.01097986,  0.11720437, -0.21089042,
       -0.09822883])
 x          = array([0.13167344, 0.15050596, 0.18967141, 0.0307388 , 0.22518316,
       0.01336799, 0.04072221, 0.02369594, 0.16490129, 0.01101731,
       0.01852258])
 y          = array([1.06199570e-01, 3.30946036e-02, 1.19100459e-01, 1.38198972e-01,
       9.81090814e-02, 1.83825992e-04, 1.06041372e-01, 1.27160782e-02,
       4.76969182e-02, 2.21907735e-01, 1.16751418e-01])
 z          = array([[ 3.55797505,  3.41997266,  5.72709084,  3.79003119,  4.64914989,
         3.64179611,  4.79696035,  4.13306046....71147394,  8.73126602,
         5.51173878,  7.07555532,  4.93811321,  6.86712599,  8.19683933,
         6.59860992]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved[neon-mahalanobis-dtypes1-97-2-5] _________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([3.44967097e-02, 1.67527853e-03, 1.28383664e-02, 8.34888488e-04,
       4.65473533e-03, 1.02375327e-02, 1.726503...1676e-03,
       1.11577602e-03, 1.04633847e-03, 1.46917729e-02, 3.11112050e-02,
       6.91564195e-03], dtype=float32)
 b          = array([1.99849140e-02, 2.21117418e-02, 4.11723508e-03, 1.96717586e-03,
       4.05539945e-03, 3.48104537e-03, 4.569997...7877e-02,
       5.16072009e-03, 1.49633428e-02, 2.16991827e-02, 1.35745844e-02,
       2.02312954e-02], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 77.97306 ,  76.811745,  71.55448 , ...,  68.28931 ,  60.700413,
         81.64061 ],
       [ 76.811745,  59.....583115],
       [ 81.64061 ,  68.23968 ,  57.09931 , ...,  54.93413 ,  66.583115,
        109.194084]], dtype=float32)
 capability = 'neon'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([3.44967097e-02, 1.67527853e-03, 1.28383664e-02, 8.34888488e-04,
       4.65473533e-03, 1.02375327e-02, 1.72650....58311462],
       [ 81.64060974,  68.23967743,  57.09931183, ...,  54.93413162,
         66.58311462, 109.19408417]]))
        before     = 9821643847872
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([3.44967097e-02, 1.67527853e-03, 1.28383664e-02, 8.34888488e-04,
       4.65473533e-03, 1.02375327e-02, 1.726503...141e-03, 1.62031676e-03,
       1.11577602e-03, 1.04633847e-03, 1.46917729e-02, 3.11112050e-02,
       6.91564195e-03])
 y = array([1.99849140e-02, 2.21117418e-02, 4.11723508e-03, 1.96717586e-03,
       4.05539945e-03, 3.48104537e-03, 4.569997...348e-03, 2.36117877e-02,
       5.16072009e-03, 1.49633428e-02, 2.16991827e-02, 1.35745844e-02,
       2.02312954e-02])
 z = array([[ 77.97306061,  76.81174469,  71.55448151, ...,  68.28930664,
         60.70041275,  81.64060974],
       [ 76....6.58311462],
       [ 81.64060974,  68.23967743,  57.09931183, ...,  54.93413162,
         66.58311462, 109.19408417]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 1.45117957e-02, -2.04364633e-02,  8.72113137e-03, -1.13228738e-03,
        5.99335879e-04,  6.75648730e-03,  1...3, -2.19914709e-02,
       -4.04494407e-03, -1.39170043e-02, -7.00740982e-03,  1.75366206e-02,
       -1.33156534e-02])
 x          = array([3.44967097e-02, 1.67527853e-03, 1.28383664e-02, 8.34888488e-04,
       4.65473533e-03, 1.02375327e-02, 1.726503...141e-03, 1.62031676e-03,
       1.11577602e-03, 1.04633847e-03, 1.46917729e-02, 3.11112050e-02,
       6.91564195e-03])
 y          = array([1.99849140e-02, 2.21117418e-02, 4.11723508e-03, 1.96717586e-03,
       4.05539945e-03, 3.48104537e-03, 4.569997...348e-03, 2.36117877e-02,
       5.16072009e-03, 1.49633428e-02, 2.16991827e-02, 1.35745844e-02,
       2.02312954e-02])
 z          = array([[ 77.97306061,  76.81174469,  71.55448151, ...,  68.28930664,
         60.70041275,  81.64060974],
       [ 76....6.58311462],
       [ 81.64060974,  68.23967743,  57.09931183, ...,  54.93413162,
         66.58311462, 109.19408417]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[neon_f16-mahalanobis-dtypes1-11-2-5] _______________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.21637133, 0.04856232, 0.04309152, 0.2060321 , 0.2342499 ,
       0.05682889, 0.01402436, 0.03922037, 0.00986392, 0.08612498,
       0.04563025], dtype=float32)
 b          = array([0.18556911, 0.06339174, 0.01003947, 0.10192975, 0.11641203,
       0.07646879, 0.05194552, 0.1327015 , 0.04215306, 0.1004096 ,
       0.11897945], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 8.110108 ,  6.7974567,  8.803208 ,  9.883324 ,  5.3769436,
         9.92431  ,  7.99684  , 15.103909 , 11.788... ,  9.494029 ,
         8.6344795,  7.9864926,  8.741367 ,  8.583469 ,  5.7238865,
         8.918855 ]], dtype=float32)
 capability = 'neon_f16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.21637133, 0.04856232, 0.04309152, 0.2060321 , 0.2342499 ,
       0.05682889, 0.01402436, 0.03922037, 0.00986...42147636,  9.49402905,
         8.63447952,  7.98649263,  8.74136734,  8.58346939,  5.72388649,
         8.91885471]]))
        before     = 9823566963333
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.21637133, 0.04856232, 0.04309152, 0.2060321 , 0.2342499 ,
       0.05682889, 0.01402436, 0.03922037, 0.00986392, 0.08612498,
       0.04563025])
 y = array([0.18556911, 0.06339174, 0.01003947, 0.10192975, 0.11641203,
       0.07646879, 0.05194552, 0.1327015 , 0.04215306, 0.1004096 ,
       0.11897945])
 z = array([[ 8.11010838,  6.79745674,  8.80320835,  9.88332367,  5.37694359,
         9.92430973,  7.99684   , 15.10390854....42147636,  9.49402905,
         8.63447952,  7.98649263,  8.74136734,  8.58346939,  5.72388649,
         8.91885471]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.03080222, -0.01482942,  0.03305206,  0.10410235,  0.11783788,
       -0.01963989, -0.03792116, -0.09348113, -0.03228914, -0.01428462,
       -0.0733492 ])
 x          = array([0.21637133, 0.04856232, 0.04309152, 0.2060321 , 0.2342499 ,
       0.05682889, 0.01402436, 0.03922037, 0.00986392, 0.08612498,
       0.04563025])
 y          = array([0.18556911, 0.06339174, 0.01003947, 0.10192975, 0.11641203,
       0.07646879, 0.05194552, 0.1327015 , 0.04215306, 0.1004096 ,
       0.11897945])
 z          = array([[ 8.11010838,  6.79745674,  8.80320835,  9.88332367,  5.37694359,
         9.92430973,  7.99684   , 15.10390854....42147636,  9.49402905,
         8.63447952,  7.98649263,  8.74136734,  8.58346939,  5.72388649,
         8.91885471]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[neon_f16-mahalanobis-dtypes1-11-3-5] _______________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.02360253, 0.18308   , 0.03068176, 0.07026859, 0.13014638,
       0.02060259, 0.10785031, 0.05325107, 0.15415649, 0.16237825,
       0.06398205], dtype=float32)
 b          = array([0.00126662, 0.15673614, 0.06694362, 0.09488472, 0.04258485,
       0.01024103, 0.26413184, 0.17074494, 0.01940758, 0.02439603,
       0.1486627 ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.6733613,  8.81394  ,  7.4502563,  3.7552252,  7.7031856,
         8.132602 ,  8.683077 ,  3.6828296,  8.090... ,  8.377097 ,
         6.2064533,  5.8267903,  8.086222 ,  8.129645 ,  8.070565 ,
         4.1204777]], dtype=float32)
 capability = 'neon_f16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02360253, 0.18308   , 0.03068176, 0.07026859, 0.13014638,
       0.02060259, 0.10785031, 0.05325107, 0.15415...06596088,  8.37709713,
         6.20645332,  5.82679033,  8.08622169,  8.12964535,  8.07056522,
         4.12047768]]))
        before     = 9824159874180
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02360253, 0.18308   , 0.03068176, 0.07026859, 0.13014638,
       0.02060259, 0.10785031, 0.05325107, 0.15415649, 0.16237825,
       0.06398205])
 y = array([0.00126662, 0.15673614, 0.06694362, 0.09488472, 0.04258485,
       0.01024103, 0.26413184, 0.17074494, 0.01940758, 0.02439603,
       0.1486627 ])
 z = array([[ 6.6733613 ,  8.81394005,  7.45025635,  3.75522518,  7.70318556,
         8.13260174,  8.68307686,  3.68282962....06596088,  8.37709713,
         6.20645332,  5.82679033,  8.08622169,  8.12964535,  8.07056522,
         4.12047768]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.02233591,  0.02634387, -0.03626186, -0.02461614,  0.08756153,
        0.01036156, -0.15628154, -0.11749387,  0.13474891,  0.13798223,
       -0.08468065])
 x          = array([0.02360253, 0.18308   , 0.03068176, 0.07026859, 0.13014638,
       0.02060259, 0.10785031, 0.05325107, 0.15415649, 0.16237825,
       0.06398205])
 y          = array([0.00126662, 0.15673614, 0.06694362, 0.09488472, 0.04258485,
       0.01024103, 0.26413184, 0.17074494, 0.01940758, 0.02439603,
       0.1486627 ])
 z          = array([[ 6.6733613 ,  8.81394005,  7.45025635,  3.75522518,  7.70318556,
         8.13260174,  8.68307686,  3.68282962....06596088,  8.37709713,
         6.20645332,  5.82679033,  8.08622169,  8.12964535,  8.07056522,
         4.12047768]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[neon_f16-mahalanobis-dtypes1-97-1-5] _______________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.02029096, 0.00078307, 0.01586928, 0.01813707, 0.01853877,
       0.01701597, 0.01190905, 0.00023836, 0.024074...2451,
       0.00580065, 0.00452589, 0.00563187, 0.00381296, 0.01138814,
       0.00465899, 0.00368648], dtype=float32)
 b          = array([5.14200563e-03, 1.89839900e-02, 5.73832868e-03, 1.77513305e-02,
       1.26091158e-02, 1.51315471e-03, 1.961492...6781e-02,
       1.58364475e-02, 1.58051925e-03, 2.31816171e-04, 9.27452743e-03,
       2.78182956e-03], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[49.113373, 54.71084 , 66.030914, ..., 61.983906, 57.132576,
        66.41026 ],
       [54.71084 , 59.239838, ...    65.29515 ],
       [66.41026 , 74.66848 , 73.26658 , ..., 63.383648, 65.29515 ,
        95.39811 ]], dtype=float32)
 capability = 'neon_f16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02029096, 0.00078307, 0.01586928, 0.01813707, 0.01853877,
       0.01701597, 0.01190905, 0.00023836, 0.02407...78, 65.29515076],
       [66.41026306, 74.66847992, 73.26657867, ..., 63.38364792,
        65.29515076, 95.39810944]]))
        before     = 9824772225823
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02029096, 0.00078307, 0.01586928, 0.01813707, 0.01853877,
       0.01701597, 0.01190905, 0.00023836, 0.024074...0014924, 0.01122451,
       0.00580065, 0.00452589, 0.00563187, 0.00381296, 0.01138814,
       0.00465899, 0.00368648])
 y = array([5.14200563e-03, 1.89839900e-02, 5.73832868e-03, 1.77513305e-02,
       1.26091158e-02, 1.51315471e-03, 1.961492...431e-03, 1.23376781e-02,
       1.58364475e-02, 1.58051925e-03, 2.31816171e-04, 9.27452743e-03,
       2.78182956e-03])
 z = array([[49.1133728 , 54.71083832, 66.03091431, ..., 61.98390579,
        57.13257599, 66.41026306],
       [54.7108383...378, 65.29515076],
       [66.41026306, 74.66847992, 73.26657867, ..., 63.38364792,
        65.29515076, 95.39810944]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 1.51489540e-02, -1.82009204e-02,  1.01309535e-02,  3.85737047e-04,
        5.92965912e-03,  1.55028165e-02, -7...3, -7.81179173e-03,
       -1.02045732e-02,  2.23244063e-03,  1.11563236e-02, -4.61553596e-03,
        9.04653221e-04])
 x          = array([0.02029096, 0.00078307, 0.01586928, 0.01813707, 0.01853877,
       0.01701597, 0.01190905, 0.00023836, 0.024074...0014924, 0.01122451,
       0.00580065, 0.00452589, 0.00563187, 0.00381296, 0.01138814,
       0.00465899, 0.00368648])
 y          = array([5.14200563e-03, 1.89839900e-02, 5.73832868e-03, 1.77513305e-02,
       1.26091158e-02, 1.51315471e-03, 1.961492...431e-03, 1.23376781e-02,
       1.58364475e-02, 1.58051925e-03, 2.31816171e-04, 9.27452743e-03,
       2.78182956e-03])
 z          = array([[49.1133728 , 54.71083832, 66.03091431, ..., 61.98390579,
        57.13257599, 66.41026306],
       [54.7108383...378, 65.29515076],
       [66.41026306, 74.66847992, 73.26657867, ..., 63.38364792,
        65.29515076, 95.39810944]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[neon_f16-mahalanobis-dtypes1-97-2-5] _______________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.00315781, 0.00666896, 0.01254073, 0.0088437 , 0.00541541,
       0.0076708 , 0.00620551, 0.02453831, 0.003536...021 ,
       0.00080413, 0.02511563, 0.00557829, 0.00786351, 0.00189263,
       0.00334593, 0.03460317], dtype=float32)
 b          = array([0.01593945, 0.00666234, 0.00155026, 0.00558872, 0.01782131,
       0.01990369, 0.00535672, 0.010275  , 0.005424...3105,
       0.0043848 , 0.00849854, 0.02463743, 0.01569486, 0.02140797,
       0.00302977, 0.01345079], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[62.262817, 65.337456, 61.640217, ..., 60.866463, 59.109413,
        74.827805],
       [65.337456, 66.803024, ...    63.45498 ],
       [74.827805, 64.75887 , 77.49839 , ..., 62.832157, 63.45498 ,
        97.728966]], dtype=float32)
 capability = 'neon_f16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.00315781, 0.00666896, 0.01254073, 0.0088437 , 0.00541541,
       0.0076708 , 0.00620551, 0.02453831, 0.00353...28, 63.45497894],
       [74.82780457, 64.75887299, 77.4983902 , ..., 62.83215714,
        63.45497894, 97.72896576]]))
        before     = 9825402973703
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.00315781, 0.00666896, 0.01254073, 0.0088437 , 0.00541541,
       0.0076708 , 0.00620551, 0.02453831, 0.003536...0175568, 0.0036021 ,
       0.00080413, 0.02511563, 0.00557829, 0.00786351, 0.00189263,
       0.00334593, 0.03460317])
 y = array([0.01593945, 0.00666234, 0.00155026, 0.00558872, 0.01782131,
       0.01990369, 0.00535672, 0.010275  , 0.005424...2064432, 0.00413105,
       0.0043848 , 0.00849854, 0.02463743, 0.01569486, 0.02140797,
       0.00302977, 0.01345079])
 z = array([[62.26281738, 65.33745575, 61.64021683, ..., 60.86646271,
        59.10941315, 74.82780457],
       [65.3374557...528, 63.45497894],
       [74.82780457, 64.75887299, 77.4983902 , ..., 62.83215714,
        63.45497894, 97.72896576]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-1.27816405e-02,  6.62449747e-06,  1.09904782e-02,  3.25498590e-03,
       -1.24058942e-02, -1.22328890e-02,  8...3,  1.66170904e-02,
       -1.90591402e-02, -7.83134531e-03, -1.95153381e-02,  3.16156074e-04,
        2.11523809e-02])
 x          = array([0.00315781, 0.00666896, 0.01254073, 0.0088437 , 0.00541541,
       0.0076708 , 0.00620551, 0.02453831, 0.003536...0175568, 0.0036021 ,
       0.00080413, 0.02511563, 0.00557829, 0.00786351, 0.00189263,
       0.00334593, 0.03460317])
 y          = array([0.01593945, 0.00666234, 0.00155026, 0.00558872, 0.01782131,
       0.01990369, 0.00535672, 0.010275  , 0.005424...2064432, 0.00413105,
       0.0043848 , 0.00849854, 0.02463743, 0.01569486, 0.02140797,
       0.00302977, 0.01345079])
 z          = array([[62.26281738, 65.33745575, 61.64021683, ..., 60.86646271,
        59.10941315, 74.82780457],
       [65.3374557...528, 63.45497894],
       [74.82780457, 64.75887299, 77.4983902 , ..., 62.83215714,
        63.45497894, 97.72896576]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[neon_f16-mahalanobis-dtypes1-97-5-5] _______________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.00189924, 0.00091342, 0.01714597, 0.00513088, 0.0037915 ,
       0.03393652, 0.00681399, 0.00801065, 0.000185...987 ,
       0.02589503, 0.00768547, 0.000888  , 0.01104717, 0.00019872,
       0.00504095, 0.00230363], dtype=float32)
 b          = array([1.65795721e-02, 1.52815124e-02, 2.71885446e-03, 8.84247292e-03,
       2.83813337e-03, 5.15829073e-03, 2.268537...5446e-03,
       5.92654571e-03, 5.95533988e-03, 1.58179980e-02, 1.47374375e-02,
       2.19959137e-03], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 61.05769 ,  56.31328 ,  60.732437, ...,  56.88888 ,  62.38491 ,
         57.73981 ],
       [ 56.31328 ,  60.....24216 ],
       [ 57.73981 ,  64.37535 ,  62.139782, ...,  73.44238 ,  67.24216 ,
        106.403984]], dtype=float32)
 capability = 'neon_f16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.00189924, 0.00091342, 0.01714597, 0.00513088, 0.0037915 ,
       0.03393652, 0.00681399, 0.00801065, 0.00018....24215698],
       [ 57.73981094,  64.37535095,  62.13978195, ...,  73.44238281,
         67.24215698, 106.40398407]]))
        before     = 9826063381028
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.00189924, 0.00091342, 0.01714597, 0.00513088, 0.0037915 ,
       0.03393652, 0.00681399, 0.00801065, 0.000185...1066762, 0.0128987 ,
       0.02589503, 0.00768547, 0.000888  , 0.01104717, 0.00019872,
       0.00504095, 0.00230363])
 y = array([1.65795721e-02, 1.52815124e-02, 2.71885446e-03, 8.84247292e-03,
       2.83813337e-03, 5.15829073e-03, 2.268537...262e-02, 3.16215446e-03,
       5.92654571e-03, 5.95533988e-03, 1.58179980e-02, 1.47374375e-02,
       2.19959137e-03])
 z = array([[ 61.05768967,  56.3132782 ,  60.73243713, ...,  56.88888168,
         62.38491058,  57.73981094],
       [ 56....7.24215698],
       [ 57.73981094,  64.37535095,  62.13978195, ...,  73.44238281,
         67.24215698, 106.40398407]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.01468033, -0.01436809,  0.01442712, -0.0037116 ,  0.00095337,
        0.02877822,  0.00658713, -0.00958121, ... -0.00231229,
       -0.0037552 ,  0.00452331, -0.00503855,  0.00509183, -0.01561928,
       -0.00969649,  0.00010404])
 x          = array([0.00189924, 0.00091342, 0.01714597, 0.00513088, 0.0037915 ,
       0.03393652, 0.00681399, 0.00801065, 0.000185...1066762, 0.0128987 ,
       0.02589503, 0.00768547, 0.000888  , 0.01104717, 0.00019872,
       0.00504095, 0.00230363])
 y          = array([1.65795721e-02, 1.52815124e-02, 2.71885446e-03, 8.84247292e-03,
       2.83813337e-03, 5.15829073e-03, 2.268537...262e-02, 3.16215446e-03,
       5.92654571e-03, 5.95533988e-03, 1.58179980e-02, 1.47374375e-02,
       2.19959137e-03])
 z          = array([[ 61.05768967,  56.3132782 ,  60.73243713, ...,  56.88888168,
         62.38491058,  57.73981094],
       [ 56....7.24215698],
       [ 57.73981094,  64.37535095,  62.13978195, ...,  73.44238281,
         67.24215698, 106.40398407]])

 scripts/test.py:152: RuntimeWarning
 ______________ test_curved[neon_bf16-mahalanobis-dtypes1-11-1-5] _______________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.12566957, 0.12625885, 0.20929602, 0.16566844, 0.09539176,
       0.01545548, 0.02672649, 0.00272214, 0.11503018, 0.01380805,
       0.10397297], dtype=float32)
 b          = array([0.12462931, 0.08381189, 0.10813707, 0.01165167, 0.06597336,
       0.18401617, 0.04131686, 0.04646106, 0.1057947 , 0.20232975,
       0.02587808], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 9.290089 ,  5.914499 ,  5.993331 ,  8.69769  ,  7.4742393,
         5.9172153,  2.7898328,  7.337204 ,  5.842... ,  5.7425385,
         3.4289093, 10.288499 ,  7.5310187,  3.1030002,  3.9145935,
         9.100652 ]], dtype=float32)
 capability = 'neon_bf16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.12566957, 0.12625885, 0.20929602, 0.16566844, 0.09539176,
       0.01545548, 0.02672649, 0.00272214, 0.11503...81772041,  5.74253845,
         3.4289093 , 10.28849888,  7.53101873,  3.10300016,  3.91459346,
         9.10065174]]))
        before     = 9827900148996
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.12566957, 0.12625885, 0.20929602, 0.16566844, 0.09539176,
       0.01545548, 0.02672649, 0.00272214, 0.11503018, 0.01380805,
       0.10397297])
 y = array([0.12462931, 0.08381189, 0.10813707, 0.01165167, 0.06597336,
       0.18401617, 0.04131686, 0.04646106, 0.1057947 , 0.20232975,
       0.02587808])
 z = array([[ 9.29008865,  5.91449881,  5.99333096,  8.69769001,  7.47423935,
         5.91721535,  2.78983283,  7.33720398....81772041,  5.74253845,
         3.4289093 , 10.28849888,  7.53101873,  3.10300016,  3.91459346,
         9.10065174]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.00104026,  0.04244696,  0.10115895,  0.15401678,  0.02941839,
       -0.16856069, -0.01459037, -0.04373892,  0.00923549, -0.18852171,
        0.07809489])
 x          = array([0.12566957, 0.12625885, 0.20929602, 0.16566844, 0.09539176,
       0.01545548, 0.02672649, 0.00272214, 0.11503018, 0.01380805,
       0.10397297])
 y          = array([0.12462931, 0.08381189, 0.10813707, 0.01165167, 0.06597336,
       0.18401617, 0.04131686, 0.04646106, 0.1057947 , 0.20232975,
       0.02587808])
 z          = array([[ 9.29008865,  5.91449881,  5.99333096,  8.69769001,  7.47423935,
         5.91721535,  2.78983283,  7.33720398....81772041,  5.74253845,
         3.4289093 , 10.28849888,  7.53101873,  3.10300016,  3.91459346,
         9.10065174]])

 scripts/test.py:152: RuntimeWarning
 ______________ test_curved[neon_bf16-mahalanobis-dtypes1-11-5-5] _______________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.09385179, 0.14805314, 0.0046292 , 0.18451029, 0.18920447,
       0.0931597 , 0.02356854, 0.06036765, 0.07412644, 0.09477372,
       0.03375507], dtype=float32)
 b          = array([0.10810614, 0.00772617, 0.01770339, 0.03104271, 0.12893577,
       0.09475396, 0.15705319, 0.07200324, 0.13409884, 0.03389363,
       0.214683  ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[3.901333 , 2.0125725, 5.6588345, 5.2689166, 3.8316426, 2.528865 ,
        3.7668986, 3.2354903, 2.806168 , 3.3... 7.5741067, 5.8578873, 8.329265 ,
        6.2321353, 7.0673094, 6.212753 , 5.53843  , 5.1077905]],
      dtype=float32)
 capability = 'neon_bf16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.09385179, 0.14805314, 0.0046292 , 0.18451029, 0.18920447,
       0.0931597 , 0.02356854, 0.06036765, 0.07412...104, 7.57410669, 5.85788727,
        8.32926464, 6.2321353 , 7.06730938, 6.21275282, 5.53843021,
        5.10779047]]))
        before     = 9828519648896
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.09385179, 0.14805314, 0.0046292 , 0.18451029, 0.18920447,
       0.0931597 , 0.02356854, 0.06036765, 0.07412644, 0.09477372,
       0.03375507])
 y = array([0.10810614, 0.00772617, 0.01770339, 0.03104271, 0.12893577,
       0.09475396, 0.15705319, 0.07200324, 0.13409884, 0.03389363,
       0.214683  ])
 z = array([[3.90133309, 2.01257253, 5.65883446, 5.26891661, 3.83164263,
        2.5288651 , 3.76689863, 3.23549032, 2.8061...1104, 7.57410669, 5.85788727,
        8.32926464, 6.2321353 , 7.06730938, 6.21275282, 5.53843021,
        5.10779047]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.01425435,  0.14032697, -0.01307419,  0.15346758,  0.0602687 ,
       -0.00159426, -0.13348465, -0.01163559, -0.05997241,  0.06088008,
       -0.18092793])
 x          = array([0.09385179, 0.14805314, 0.0046292 , 0.18451029, 0.18920447,
       0.0931597 , 0.02356854, 0.06036765, 0.07412644, 0.09477372,
       0.03375507])
 y          = array([0.10810614, 0.00772617, 0.01770339, 0.03104271, 0.12893577,
       0.09475396, 0.15705319, 0.07200324, 0.13409884, 0.03389363,
       0.214683  ])
 z          = array([[3.90133309, 2.01257253, 5.65883446, 5.26891661, 3.83164263,
        2.5288651 , 3.76689863, 3.23549032, 2.8061...1104, 7.57410669, 5.85788727,
        8.32926464, 6.2321353 , 7.06730938, 6.21275282, 5.53843021,
        5.10779047]])

 scripts/test.py:152: RuntimeWarning
 ______________ test_curved[neon_bf16-mahalanobis-dtypes1-97-1-5] _______________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.0117337 , 0.00630518, 0.00378072, 0.01037649, 0.00043756,
       0.01433915, 0.00606975, 0.0043353 , 0.006291...1256,
       0.01719427, 0.00307332, 0.00647507, 0.00606733, 0.01728325,
       0.01102025, 0.00933381], dtype=float32)
 b          = array([0.00758251, 0.00994408, 0.00083143, 0.00552116, 0.01928728,
       0.00581162, 0.00721728, 0.00358333, 0.017627...6724,
       0.02726072, 0.0123333 , 0.03572074, 0.00507672, 0.00574891,
       0.01745796, 0.02254968], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 67.69649 ,  64.77999 ,  64.577965, ...,  71.461716,  76.65157 ,
         72.95486 ],
       [ 64.77999 ,  57.....028534],
       [ 72.95486 ,  61.47972 ,  66.590614, ...,  77.268745,  70.028534,
        106.089615]], dtype=float32)
 capability = 'neon_bf16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.0117337 , 0.00630518, 0.00378072, 0.01037649, 0.00043756,
       0.01433915, 0.00606975, 0.0043353 , 0.00629....02853394],
       [ 72.95485687,  61.47972107,  66.59061432, ...,  77.26874542,
         70.02853394, 106.08961487]]))
        before     = 9829119243897
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.0117337 , 0.00630518, 0.00378072, 0.01037649, 0.00043756,
       0.01433915, 0.00606975, 0.0043353 , 0.006291...200207 , 0.01031256,
       0.01719427, 0.00307332, 0.00647507, 0.00606733, 0.01728325,
       0.01102025, 0.00933381])
 y = array([0.00758251, 0.00994408, 0.00083143, 0.00552116, 0.01928728,
       0.00581162, 0.00721728, 0.00358333, 0.017627...1736343, 0.00176724,
       0.02726072, 0.0123333 , 0.03572074, 0.00507672, 0.00574891,
       0.01745796, 0.02254968])
 z = array([[ 67.69648743,  64.77999115,  64.57796478, ...,  71.4617157 ,
         76.65157318,  72.95485687],
       [ 64....0.02853394],
       [ 72.95485687,  61.47972107,  66.59061432, ...,  77.26874542,
         70.02853394, 106.08961487]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.0041512 , -0.0036389 ,  0.0029493 ,  0.00485533, -0.01884972,
        0.00852753, -0.00114753,  0.00075198, ...  0.00854531,
       -0.01006645, -0.00925998, -0.02924567,  0.00099062,  0.01153434,
       -0.00643771, -0.01321587])
 x          = array([0.0117337 , 0.00630518, 0.00378072, 0.01037649, 0.00043756,
       0.01433915, 0.00606975, 0.0043353 , 0.006291...200207 , 0.01031256,
       0.01719427, 0.00307332, 0.00647507, 0.00606733, 0.01728325,
       0.01102025, 0.00933381])
 y          = array([0.00758251, 0.00994408, 0.00083143, 0.00552116, 0.01928728,
       0.00581162, 0.00721728, 0.00358333, 0.017627...1736343, 0.00176724,
       0.02726072, 0.0123333 , 0.03572074, 0.00507672, 0.00574891,
       0.01745796, 0.02254968])
 z          = array([[ 67.69648743,  64.77999115,  64.57796478, ...,  71.4617157 ,
         76.65157318,  72.95485687],
       [ 64....0.02853394],
       [ 72.95485687,  61.47972107,  66.59061432, ...,  77.26874542,
         70.02853394, 106.08961487]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[neon_i8-mahalanobis-dtypes1-11-1-5] ________________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.09407397, 0.09344136, 0.1049271 , 0.0662691 , 0.08221431,
       0.0560272 , 0.1667129 , 0.07815744, 0.05498565, 0.05326087,
       0.14993009], dtype=float32)
 b          = array([0.09140256, 0.058413  , 0.01659825, 0.1553793 , 0.02589794,
       0.11592819, 0.17679214, 0.03037448, 0.09459713, 0.09605881,
       0.13855824], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 8.356414 ,  7.0126514,  7.5036244,  5.4054546,  6.990689 ,
        10.050119 ,  6.44759  ,  5.6272726,  6.911... ,  9.75897  ,
         3.0298398,  8.41783  ,  7.4016156,  9.404134 ,  9.314806 ,
         5.984726 ]], dtype=float32)
 capability = 'neon_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.09407397, 0.09344136, 0.1049271 , 0.0662691 , 0.08221431,
       0.0560272 , 0.1667129 , 0.07815744, 0.05498...4638648 ,  9.75897026,
         3.02983975,  8.41783047,  7.40161562,  9.4041338 ,  9.31480598,
         5.98472595]]))
        before     = 9831016759313
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.09407397, 0.09344136, 0.1049271 , 0.0662691 , 0.08221431,
       0.0560272 , 0.1667129 , 0.07815744, 0.05498565, 0.05326087,
       0.14993009])
 y = array([0.09140256, 0.058413  , 0.01659825, 0.1553793 , 0.02589794,
       0.11592819, 0.17679214, 0.03037448, 0.09459713, 0.09605881,
       0.13855824])
 z = array([[ 8.35641384,  7.01265144,  7.50362444,  5.40545464,  6.9906888 ,
        10.0501194 ,  6.44758987,  5.62727261....4638648 ,  9.75897026,
         3.02983975,  8.41783047,  7.40161562,  9.4041338 ,  9.31480598,
         5.98472595]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.00267141,  0.03502836,  0.08832885, -0.0891102 ,  0.05631637,
       -0.05990099, -0.01007925,  0.04778296, -0.03961148, -0.04279793,
        0.01137185])
 x          = array([0.09407397, 0.09344136, 0.1049271 , 0.0662691 , 0.08221431,
       0.0560272 , 0.1667129 , 0.07815744, 0.05498565, 0.05326087,
       0.14993009])
 y          = array([0.09140256, 0.058413  , 0.01659825, 0.1553793 , 0.02589794,
       0.11592819, 0.17679214, 0.03037448, 0.09459713, 0.09605881,
       0.13855824])
 z          = array([[ 8.35641384,  7.01265144,  7.50362444,  5.40545464,  6.9906888 ,
        10.0501194 ,  6.44758987,  5.62727261....4638648 ,  9.75897026,
         3.02983975,  8.41783047,  7.40161562,  9.4041338 ,  9.31480598,
         5.98472595]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[neon_i8-mahalanobis-dtypes1-11-4-5] ________________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.05170924, 0.06271512, 0.11620239, 0.01434473, 0.09036927,
       0.07995455, 0.02539621, 0.03968008, 0.02373172, 0.16092795,
       0.33496875], dtype=float32)
 b          = array([0.18380266, 0.09887243, 0.02563286, 0.10999566, 0.0597386 ,
       0.0616795 , 0.05702863, 0.13110694, 0.1541055 , 0.01587757,
       0.10215963], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[10.575495 ,  7.7977734, 10.580074 ,  9.698586 , 11.610996 ,
         8.78133  , 10.066198 ,  6.1088085, 11.010... ,  6.651123 ,
         5.112233 ,  5.325964 ,  5.8981237,  7.838216 ,  5.1809793,
         5.9365745]], dtype=float32)
 capability = 'neon_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.05170924, 0.06271512, 0.11620239, 0.01434473, 0.09036927,
       0.07995455, 0.02539621, 0.03968008, 0.02373...15859079,  6.65112305,
         5.11223316,  5.32596397,  5.89812374,  7.83821583,  5.18097925,
         5.93657446]]))
        before     = 9831618716970
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.05170924, 0.06271512, 0.11620239, 0.01434473, 0.09036927,
       0.07995455, 0.02539621, 0.03968008, 0.02373172, 0.16092795,
       0.33496875])
 y = array([0.18380266, 0.09887243, 0.02563286, 0.10999566, 0.0597386 ,
       0.0616795 , 0.05702863, 0.13110694, 0.1541055 , 0.01587757,
       0.10215963])
 z = array([[10.57549477,  7.79777336, 10.58007431,  9.69858646, 11.61099625,
         8.78133011, 10.06619835,  6.10880852....15859079,  6.65112305,
         5.11223316,  5.32596397,  5.89812374,  7.83821583,  5.18097925,
         5.93657446]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.13209342, -0.03615731,  0.09056953, -0.09565093,  0.03063067,
        0.01827505, -0.03163241, -0.09142686, -0.13037378,  0.14505038,
        0.23280911])
 x          = array([0.05170924, 0.06271512, 0.11620239, 0.01434473, 0.09036927,
       0.07995455, 0.02539621, 0.03968008, 0.02373172, 0.16092795,
       0.33496875])
 y          = array([0.18380266, 0.09887243, 0.02563286, 0.10999566, 0.0597386 ,
       0.0616795 , 0.05702863, 0.13110694, 0.1541055 , 0.01587757,
       0.10215963])
 z          = array([[10.57549477,  7.79777336, 10.58007431,  9.69858646, 11.61099625,
         8.78133011, 10.06619835,  6.10880852....15859079,  6.65112305,
         5.11223316,  5.32596397,  5.89812374,  7.83821583,  5.18097925,
         5.93657446]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[neon_i8-mahalanobis-dtypes1-11-5-5] ________________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.07492396, 0.19293088, 0.13699064, 0.06142258, 0.10275801,
       0.0949654 , 0.00064185, 0.08489066, 0.08139855, 0.07186247,
       0.09721498], dtype=float32)
 b          = array([0.05297232, 0.00105692, 0.01447747, 0.068666  , 0.03985731,
       0.11717293, 0.22946048, 0.01266141, 0.05634362, 0.10716125,
       0.3001703 ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 3.4214072,  5.0171676,  4.8722086,  4.428088 ,  4.038274 ,
         4.6182055,  5.507593 ,  6.8600903,  4.406... ,  4.8636565,
         5.0663652,  4.8838663,  3.9938748,  4.3764505,  5.5504985,
         3.089565 ]], dtype=float32)
 capability = 'neon_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.07492396, 0.19293088, 0.13699064, 0.06142258, 0.10275801,
       0.0949654 , 0.00064185, 0.08489066, 0.08139...59331107,  4.86365652,
         5.06636524,  4.88386631,  3.99387479,  4.37645054,  5.55049849,
         3.08956504]]))
        before     = 9832202480950
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.07492396, 0.19293088, 0.13699064, 0.06142258, 0.10275801,
       0.0949654 , 0.00064185, 0.08489066, 0.08139855, 0.07186247,
       0.09721498])
 y = array([0.05297232, 0.00105692, 0.01447747, 0.068666  , 0.03985731,
       0.11717293, 0.22946048, 0.01266141, 0.05634362, 0.10716125,
       0.3001703 ])
 z = array([[ 3.42140722,  5.01716757,  4.8722086 ,  4.42808819,  4.03827381,
         4.61820555,  5.50759315,  6.86009026....59331107,  4.86365652,
         5.06636524,  4.88386631,  3.99387479,  4.37645054,  5.55049849,
         3.08956504]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.02195164,  0.19187396,  0.12251317, -0.00724341,  0.0629007 ,
       -0.02220754, -0.22881862,  0.07222926,  0.02505493, -0.03529878,
       -0.20295532])
 x          = array([0.07492396, 0.19293088, 0.13699064, 0.06142258, 0.10275801,
       0.0949654 , 0.00064185, 0.08489066, 0.08139855, 0.07186247,
       0.09721498])
 y          = array([0.05297232, 0.00105692, 0.01447747, 0.068666  , 0.03985731,
       0.11717293, 0.22946048, 0.01266141, 0.05634362, 0.10716125,
       0.3001703 ])
 z          = array([[ 3.42140722,  5.01716757,  4.8722086 ,  4.42808819,  4.03827381,
         4.61820555,  5.50759315,  6.86009026....59331107,  4.86365652,
         5.06636524,  4.88386631,  3.99387479,  4.37645054,  5.55049849,
         3.08956504]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[neon_i8-mahalanobis-dtypes1-97-3-5] ________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'neon_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([3.75917181e-03, 9.88447480e-03, 1.38224252e-02, 1.50160464e-02,
       9.52259265e-03, 2.95450613e-02, 1.757663...8907e-03,
       5.35268430e-03, 1.26982369e-02, 3.83589626e-03, 9.02340253e-05,
       1.07680755e-02], dtype=float32)
 b          = array([0.01751885, 0.00561081, 0.00177287, 0.01464505, 0.01591232,
       0.01497766, 0.00164734, 0.00984539, 0.004135...5548,
       0.01473297, 0.0016306 , 0.01125617, 0.02153592, 0.02615132,
       0.0216776 , 0.00526574], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 78.15441 ,  57.529716,  70.72835 , ...,  67.466934,  55.70642 ,
         68.643425],
       [ 57.529716,  50.....72673 ],
       [ 68.643425,  55.768875,  66.26793 , ...,  69.32979 ,  71.72673 ,
        127.105934]], dtype=float32)
 capability = 'neon_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([3.75917181e-03, 9.88447480e-03, 1.38224252e-02, 1.50160464e-02,
       9.52259265e-03, 2.95450613e-02, 1.75766....72673035],
       [ 68.64342499,  55.76887512,  66.26792908, ...,  69.32978821,
         71.72673035, 127.10593414]]))
        before     = 9832832720935
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([3.75917181e-03, 9.88447480e-03, 1.38224252e-02, 1.50160464e-02,
       9.52259265e-03, 2.95450613e-02, 1.757663...066e-03, 1.20958907e-03,
       5.35268430e-03, 1.26982369e-02, 3.83589626e-03, 9.02340253e-05,
       1.07680755e-02])
 y = array([0.01751885, 0.00561081, 0.00177287, 0.01464505, 0.01591232,
       0.01497766, 0.00164734, 0.00984539, 0.004135...199904 , 0.01485548,
       0.01473297, 0.0016306 , 0.01125617, 0.02153592, 0.02615132,
       0.0216776 , 0.00526574])
 z = array([[ 78.15441132,  57.52971649,  70.72834778, ...,  67.4669342 ,
         55.7064209 ,  68.64342499],
       [ 57....1.72673035],
       [ 68.64342499,  55.76887512,  66.26792908, ...,  69.32978821,
         71.72673035, 127.10593414]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.01375968,  0.00427366,  0.01204956,  0.000371  , -0.00638973,
        0.0145674 ,  0.01592929, -0.00172251, ... -0.00769933,
       -0.01164202, -0.00042101, -0.00590349, -0.00883768, -0.02231542,
       -0.02158737,  0.00550233])
 x          = array([3.75917181e-03, 9.88447480e-03, 1.38224252e-02, 1.50160464e-02,
       9.52259265e-03, 2.95450613e-02, 1.757663...066e-03, 1.20958907e-03,
       5.35268430e-03, 1.26982369e-02, 3.83589626e-03, 9.02340253e-05,
       1.07680755e-02])
 y          = array([0.01751885, 0.00561081, 0.00177287, 0.01464505, 0.01591232,
       0.01497766, 0.00164734, 0.00984539, 0.004135...199904 , 0.01485548,
       0.01473297, 0.0016306 , 0.01125617, 0.02153592, 0.02615132,
       0.0216776 , 0.00526574])
 z          = array([[ 78.15441132,  57.52971649,  70.72834778, ...,  67.4669342 ,
         55.7064209 ,  68.64342499],
       [ 57....1.72673035],
       [ 68.64342499,  55.76887512,  66.26792908, ...,  69.32978821,
         71.72673035, 127.10593414]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved[sve-mahalanobis-dtypes1-11-1-5] __________________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.05872718, 0.1186891 , 0.30520082, 0.1310885 , 0.15944648,
       0.09888712, 0.01812298, 0.00391495, 0.06051283, 0.0349284 ,
       0.01048163], dtype=float32)
 b          = array([8.6259678e-02, 2.3306589e-01, 2.5111541e-02, 1.0212398e-02,
       4.3326274e-02, 2.7757922e-01, 2.8120021e-05, 6.4530745e-02,
       5.9481554e-02, 9.6162550e-02, 1.0424198e-01], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.567223 ,  7.3652735,  5.8288927,  5.4123325,  4.6767464,
         7.0593104,  9.645592 ,  5.2501445,  4.763... ,  7.979461 ,
         6.954627 , 10.11976  ,  5.715429 ,  7.0907393,  5.0363894,
        10.640943 ]], dtype=float32)
 capability = 'sve'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.05872718, 0.1186891 , 0.30520082, 0.1310885 , 0.15944648,
       0.09888712, 0.01812298, 0.00391495, 0.06051...33589458,  7.97946119,
         6.95462704, 10.11975956,  5.71542883,  7.09073925,  5.03638935,
        10.64094257]]))
        before     = 9834695694792
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.05872718, 0.1186891 , 0.30520082, 0.1310885 , 0.15944648,
       0.09888712, 0.01812298, 0.00391495, 0.06051283, 0.0349284 ,
       0.01048163])
 y = array([8.62596780e-02, 2.33065888e-01, 2.51115412e-02, 1.02123981e-02,
       4.33262736e-02, 2.77579218e-01, 2.81200209e-05, 6.45307451e-02,
       5.94815537e-02, 9.61625502e-02, 1.04241982e-01])
 z = array([[ 6.56722307,  7.36527348,  5.82889271,  5.41233253,  4.67674637,
         7.05931044,  9.64559174,  5.25014448....33589458,  7.97946119,
         6.95462704, 10.11975956,  5.71542883,  7.09073925,  5.03638935,
        10.64094257]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.0275325 , -0.11437679,  0.28008927,  0.1208761 ,  0.1161202 ,
       -0.17869209,  0.01809486, -0.06061579,  0.00103128, -0.06123415,
       -0.09376035])
 x          = array([0.05872718, 0.1186891 , 0.30520082, 0.1310885 , 0.15944648,
       0.09888712, 0.01812298, 0.00391495, 0.06051283, 0.0349284 ,
       0.01048163])
 y          = array([8.62596780e-02, 2.33065888e-01, 2.51115412e-02, 1.02123981e-02,
       4.33262736e-02, 2.77579218e-01, 2.81200209e-05, 6.45307451e-02,
       5.94815537e-02, 9.61625502e-02, 1.04241982e-01])
 z          = array([[ 6.56722307,  7.36527348,  5.82889271,  5.41233253,  4.67674637,
         7.05931044,  9.64559174,  5.25014448....33589458,  7.97946119,
         6.95462704, 10.11975956,  5.71542883,  7.09073925,  5.03638935,
        10.64094257]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved[sve-mahalanobis-dtypes1-97-1-5] __________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.00154837, 0.03189812, 0.00185868, 0.0182995 , 0.00474141,
       0.00515214, 0.01640855, 0.00533401, 0.000562...1248,
       0.00253867, 0.00039299, 0.0062598 , 0.00231987, 0.02156041,
       0.01681967, 0.00391236], dtype=float32)
 b          = array([3.8088602e-03, 1.3899658e-02, 2.1285047e-03, 2.0474462e-02,
       8.1413491e-03, 3.3770239e-03, 1.8906747e-03,...1.5020102e-02,
       8.4531829e-03, 2.6157830e-02, 7.7587347e-03, 1.2553397e-02,
       1.6162399e-02], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 46.106327,  52.166897,  57.17884 , ...,  53.17073 ,  53.34369 ,
         64.68454 ],
       [ 52.166897,  67.....32661 ],
       [ 64.68454 ,  67.60934 ,  62.40522 , ...,  73.46413 ,  68.32661 ,
        122.19456 ]], dtype=float32)
 capability = 'sve'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.00154837, 0.03189812, 0.00185868, 0.0182995 , 0.00474141,
       0.00515214, 0.01640855, 0.00533401, 0.00056....32660675],
       [ 64.68453979,  67.60933685,  62.40522003, ...,  73.46412659,
         68.32660675, 122.19455719]]))
        before     = 9835351605545
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.00154837, 0.03189812, 0.00185868, 0.0182995 , 0.00474141,
       0.00515214, 0.01640855, 0.00533401, 0.000562...1069757, 0.01721248,
       0.00253867, 0.00039299, 0.0062598 , 0.00231987, 0.02156041,
       0.01681967, 0.00391236])
 y = array([3.80886020e-03, 1.38996579e-02, 2.12850468e-03, 2.04744618e-02,
       8.14134907e-03, 3.37702385e-03, 1.890674...199e-02, 1.50201023e-02,
       8.45318288e-03, 2.61578299e-02, 7.75873475e-03, 1.25533966e-02,
       1.61623992e-02])
 z = array([[ 46.10632706,  52.16689682,  57.17884064, ...,  53.17073059,
         53.34368896,  64.68453979],
       [ 52....8.32660675],
       [ 64.68453979,  67.60933685,  62.40522003, ...,  73.46412659,
         68.32660675, 122.19455719]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-2.26048683e-03,  1.79984607e-02, -2.69823475e-04, -2.17496045e-03,
       -3.39993602e-03,  1.77511224e-03,  1...2, -1.46271162e-02,
       -2.19338713e-03, -2.38379564e-02,  1.38016716e-02,  4.26627416e-03,
       -1.22500388e-02])
 x          = array([0.00154837, 0.03189812, 0.00185868, 0.0182995 , 0.00474141,
       0.00515214, 0.01640855, 0.00533401, 0.000562...1069757, 0.01721248,
       0.00253867, 0.00039299, 0.0062598 , 0.00231987, 0.02156041,
       0.01681967, 0.00391236])
 y          = array([3.80886020e-03, 1.38996579e-02, 2.12850468e-03, 2.04744618e-02,
       8.14134907e-03, 3.37702385e-03, 1.890674...199e-02, 1.50201023e-02,
       8.45318288e-03, 2.61578299e-02, 7.75873475e-03, 1.25533966e-02,
       1.61623992e-02])
 z          = array([[ 46.10632706,  52.16689682,  57.17884064, ...,  53.17073059,
         53.34368896,  64.68453979],
       [ 52....8.32660675],
       [ 64.68453979,  67.60933685,  62.40522003, ...,  73.46412659,
         68.32660675, 122.19455719]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved[sve-mahalanobis-dtypes1-97-2-5] __________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.01179272, 0.01036254, 0.0059543 , 0.0015779 , 0.00856421,
       0.00306894, 0.03634759, 0.00962117, 0.005238...2227,
       0.00198714, 0.00612292, 0.00954534, 0.01296468, 0.00453877,
       0.0068422 , 0.00451064], dtype=float32)
 b          = array([2.26855297e-02, 1.17571745e-02, 6.72302907e-03, 6.01777900e-03,
       1.18129430e-02, 1.14153204e-02, 2.149879...0781e-03,
       1.77205876e-02, 3.35904071e-03, 6.93957414e-03, 2.11232789e-02,
       1.06819956e-04], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[70.54968 , 68.18544 , 62.94088 , ..., 68.834114, 59.699883,
        60.959454],
       [68.18544 , 66.73886 , ...    58.481354],
       [60.959454, 59.107418, 51.61528 , ..., 58.875103, 58.481354,
        97.984604]], dtype=float32)
 capability = 'sve'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.01179272, 0.01036254, 0.0059543 , 0.0015779 , 0.00856421,
       0.00306894, 0.03634759, 0.00962117, 0.00523...05, 58.48135376],
       [60.95945358, 59.10741806, 51.61528015, ..., 58.875103  ,
        58.48135376, 97.98460388]]))
        before     = 9835969935619
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.01179272, 0.01036254, 0.0059543 , 0.0015779 , 0.00856421,
       0.00306894, 0.03634759, 0.00962117, 0.005238...3104555, 0.00282227,
       0.00198714, 0.00612292, 0.00954534, 0.01296468, 0.00453877,
       0.0068422 , 0.00451064])
 y = array([2.26855297e-02, 1.17571745e-02, 6.72302907e-03, 6.01777900e-03,
       1.18129430e-02, 1.14153204e-02, 2.149879...938e-03, 2.72210781e-03,
       1.77205876e-02, 3.35904071e-03, 6.93957414e-03, 2.11232789e-02,
       1.06819956e-04])
 z = array([[70.54968262, 68.18544006, 62.94087982, ..., 68.83411407,
        59.69988251, 60.95945358],
       [68.1854400...405, 58.48135376],
       [60.95945358, 59.10741806, 51.61528015, ..., 58.875103  ,
        58.48135376, 97.98460388]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.01089281, -0.00139463, -0.00076873, -0.00443987, -0.00324873,
       -0.00834638,  0.03419772,  0.00321666, ... -0.00520851,
       -0.00192434,  0.00340081, -0.00817525,  0.00960564, -0.00240081,
       -0.01428108,  0.00440382])
 x          = array([0.01179272, 0.01036254, 0.0059543 , 0.0015779 , 0.00856421,
       0.00306894, 0.03634759, 0.00962117, 0.005238...3104555, 0.00282227,
       0.00198714, 0.00612292, 0.00954534, 0.01296468, 0.00453877,
       0.0068422 , 0.00451064])
 y          = array([2.26855297e-02, 1.17571745e-02, 6.72302907e-03, 6.01777900e-03,
       1.18129430e-02, 1.14153204e-02, 2.149879...938e-03, 2.72210781e-03,
       1.77205876e-02, 3.35904071e-03, 6.93957414e-03, 2.11232789e-02,
       1.06819956e-04])
 z          = array([[70.54968262, 68.18544006, 62.94087982, ..., 68.83411407,
        59.69988251, 60.95945358],
       [68.1854400...405, 58.48135376],
       [60.95945358, 59.10741806, 51.61528015, ..., 58.875103  ,
        58.48135376, 97.98460388]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved[sve-mahalanobis-dtypes1-97-3-5] __________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.01219706, 0.01284225, 0.00481479, 0.00168552, 0.01403813,
       0.01297447, 0.01036073, 0.00662844, 0.008093...1272,
       0.01033795, 0.00934483, 0.00382229, 0.02085708, 0.01281037,
       0.0167314 , 0.0056016 ], dtype=float32)
 b          = array([1.11118658e-03, 1.44879213e-02, 7.03349290e-03, 2.02575177e-02,
       7.50645762e-03, 1.38990988e-03, 1.134062...4362e-03,
       1.30294561e-02, 2.39807665e-02, 5.96481422e-03, 1.79492701e-02,
       1.75466668e-02], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[57.84154 , 55.01382 , 67.56467 , ..., 64.12165 , 57.92955 ,
        59.13235 ],
       [55.01382 , 67.339294, ...    52.03633 ],
       [59.13235 , 71.916756, 61.1079  , ..., 52.78462 , 52.03633 ,
        95.672165]], dtype=float32)
 capability = 'sve'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.01219706, 0.01284225, 0.00481479, 0.00168552, 0.01403813,
       0.01297447, 0.01036073, 0.00662844, 0.00809...43, 52.03633118],
       [59.13235092, 71.91675568, 61.10789871, ..., 52.78461838,
        52.03633118, 95.67216492]]))
        before     = 9836593805972
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.01219706, 0.01284225, 0.00481479, 0.00168552, 0.01403813,
       0.01297447, 0.01036073, 0.00662844, 0.008093...240273 , 0.00261272,
       0.01033795, 0.00934483, 0.00382229, 0.02085708, 0.01281037,
       0.0167314 , 0.0056016 ])
 y = array([1.11118658e-03, 1.44879213e-02, 7.03349290e-03, 2.02575177e-02,
       7.50645762e-03, 1.38990988e-03, 1.134062...286e-03, 4.15354362e-03,
       1.30294561e-02, 2.39807665e-02, 5.96481422e-03, 1.79492701e-02,
       1.75466668e-02])
 z = array([[57.84154129, 55.01382065, 67.56466675, ..., 64.1216507 ,
        57.92955017, 59.13235092],
       [55.0138206...743, 52.03633118],
       [59.13235092, 71.91675568, 61.10789871, ..., 52.78461838,
        52.03633118, 95.67216492]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.01108587, -0.00164567, -0.00221871, -0.018572  ,  0.00653168,
        0.01158456, -0.0009799 , -0.00213733, ...  0.00149182,
        0.00149532,  0.00519129, -0.00920717, -0.00312368,  0.00684555,
       -0.00121787, -0.01194507])
 x          = array([0.01219706, 0.01284225, 0.00481479, 0.00168552, 0.01403813,
       0.01297447, 0.01036073, 0.00662844, 0.008093...240273 , 0.00261272,
       0.01033795, 0.00934483, 0.00382229, 0.02085708, 0.01281037,
       0.0167314 , 0.0056016 ])
 y          = array([1.11118658e-03, 1.44879213e-02, 7.03349290e-03, 2.02575177e-02,
       7.50645762e-03, 1.38990988e-03, 1.134062...286e-03, 4.15354362e-03,
       1.30294561e-02, 2.39807665e-02, 5.96481422e-03, 1.79492701e-02,
       1.75466668e-02])
 z          = array([[57.84154129, 55.01382065, 67.56466675, ..., 64.1216507 ,
        57.92955017, 59.13235092],
       [55.0138206...743, 52.03633118],
       [59.13235092, 71.91675568, 61.10789871, ..., 52.78461838,
        52.03633118, 95.67216492]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved[sve-mahalanobis-dtypes1-97-4-5] __________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.00055477, 0.00072346, 0.00069846, 0.00326771, 0.01899423,
       0.0024047 , 0.00281434, 0.01187838, 0.002908...8761,
       0.00720807, 0.00445719, 0.01655827, 0.00273426, 0.01619994,
       0.00791512, 0.00135728], dtype=float32)
 b          = array([0.01846114, 0.00185756, 0.01198972, 0.01056782, 0.00300742,
       0.00073716, 0.01382682, 0.00314936, 0.006601...9295,
       0.01613138, 0.00974245, 0.00103715, 0.0108334 , 0.00471317,
       0.01104865, 0.00828682], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 67.210236,  71.40044 ,  62.469753, ...,  63.010742,  65.6645  ,
         66.55497 ],
       [ 71.40044 ,  56.....780075],
       [ 66.55497 ,  68.360146,  63.470097, ...,  71.450645,  66.780075,
        105.99629 ]], dtype=float32)
 capability = 'sve'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.00055477, 0.00072346, 0.00069846, 0.00326771, 0.01899423,
       0.0024047 , 0.00281434, 0.01187838, 0.00290....78007507],
       [ 66.55496979,  68.36014557,  63.47009659, ...,  71.45064545,
         66.78007507, 105.99629211]]))
        before     = 9837213817313
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.00055477, 0.00072346, 0.00069846, 0.00326771, 0.01899423,
       0.0024047 , 0.00281434, 0.01187838, 0.002908...1664323, 0.00148761,
       0.00720807, 0.00445719, 0.01655827, 0.00273426, 0.01619994,
       0.00791512, 0.00135728])
 y = array([0.01846114, 0.00185756, 0.01198972, 0.01056782, 0.00300742,
       0.00073716, 0.01382682, 0.00314936, 0.006601...1189981, 0.00929295,
       0.01613138, 0.00974245, 0.00103715, 0.0108334 , 0.00471317,
       0.01104865, 0.00828682])
 z = array([[ 67.2102356 ,  71.4004364 ,  62.46975327, ...,  63.01074219,
         65.66449738,  66.55496979],
       [ 71....6.78007507],
       [ 66.55496979,  68.36014557,  63.47009659, ...,  71.45064545,
         66.78007507, 105.99629211]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.01790637, -0.0011341 , -0.01129126, -0.00730011,  0.01598681,
        0.00166755, -0.01101249,  0.00872903, ... -0.00780534,
       -0.00892331, -0.00528526,  0.01552112, -0.00809914,  0.01148678,
       -0.00313352, -0.00692955])
 x          = array([0.00055477, 0.00072346, 0.00069846, 0.00326771, 0.01899423,
       0.0024047 , 0.00281434, 0.01187838, 0.002908...1664323, 0.00148761,
       0.00720807, 0.00445719, 0.01655827, 0.00273426, 0.01619994,
       0.00791512, 0.00135728])
 y          = array([0.01846114, 0.00185756, 0.01198972, 0.01056782, 0.00300742,
       0.00073716, 0.01382682, 0.00314936, 0.006601...1189981, 0.00929295,
       0.01613138, 0.00974245, 0.00103715, 0.0108334 , 0.00471317,
       0.01104865, 0.00828682])
 z          = array([[ 67.2102356 ,  71.4004364 ,  62.46975327, ...,  63.01074219,
         65.66449738,  66.55496979],
       [ 71....6.78007507],
       [ 66.55496979,  68.36014557,  63.47009659, ...,  71.45064545,
         66.78007507, 105.99629211]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[sve_f16-mahalanobis-dtypes1-11-2-5] ________________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.13699707, 0.02199499, 0.03039774, 0.27060673, 0.00140274,
       0.01910348, 0.00092751, 0.1412116 , 0.28663036, 0.04378747,
       0.04694033], dtype=float32)
 b          = array([0.10164052, 0.25276133, 0.06746633, 0.02293266, 0.13098419,
       0.03495596, 0.07959677, 0.13805059, 0.06080938, 0.05839757,
       0.05240469], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.0463257,  5.0630283,  7.151657 ,  8.165584 , 12.559593 ,
         5.738157 ,  9.021476 ,  8.773019 ,  9.608... ,  5.4687567,
         5.675983 ,  2.4447052,  6.3183246,  6.031467 ,  5.5417657,
         8.675354 ]], dtype=float32)
 capability = 'sve_f16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.13699707, 0.02199499, 0.03039774, 0.27060673, 0.00140274,
       0.01910348, 0.00092751, 0.1412116 , 0.28663...46926212,  5.46875668,
         5.67598295,  2.44470525,  6.31832457,  6.03146696,  5.54176569,
         8.675354  ]]))
        before     = 9839107852415
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.13699707, 0.02199499, 0.03039774, 0.27060673, 0.00140274,
       0.01910348, 0.00092751, 0.1412116 , 0.28663036, 0.04378747,
       0.04694033])
 y = array([0.10164052, 0.25276133, 0.06746633, 0.02293266, 0.13098419,
       0.03495596, 0.07959677, 0.13805059, 0.06080938, 0.05839757,
       0.05240469])
 z = array([[ 6.04632568,  5.06302834,  7.1516571 ,  8.16558361, 12.5595932 ,
         5.7381568 ,  9.02147579,  8.77301884....46926212,  5.46875668,
         5.67598295,  2.44470525,  6.31832457,  6.03146696,  5.54176569,
         8.675354  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.03535656, -0.23076634, -0.03706859,  0.24767406, -0.12958145,
       -0.01585248, -0.07866927,  0.00316101,  0.22582098, -0.0146101 ,
       -0.00546436])
 x          = array([0.13699707, 0.02199499, 0.03039774, 0.27060673, 0.00140274,
       0.01910348, 0.00092751, 0.1412116 , 0.28663036, 0.04378747,
       0.04694033])
 y          = array([0.10164052, 0.25276133, 0.06746633, 0.02293266, 0.13098419,
       0.03495596, 0.07959677, 0.13805059, 0.06080938, 0.05839757,
       0.05240469])
 z          = array([[ 6.04632568,  5.06302834,  7.1516571 ,  8.16558361, 12.5595932 ,
         5.7381568 ,  9.02147579,  8.77301884....46926212,  5.46875668,
         5.67598295,  2.44470525,  6.31832457,  6.03146696,  5.54176569,
         8.675354  ]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[sve_f16-mahalanobis-dtypes1-11-3-5] ________________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.02511002, 0.07567891, 0.03735658, 0.08694794, 0.11688765,
       0.07611433, 0.09140281, 0.00909586, 0.25376835, 0.08906175,
       0.13857573], dtype=float32)
 b          = array([0.04972251, 0.05546035, 0.09120101, 0.07513535, 0.16308983,
       0.13534352, 0.1512973 , 0.0727937 , 0.11839865, 0.04599389,
       0.04156388], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 4.808964 ,  2.5301678,  6.2131267,  2.8716307,  4.638499 ,
         8.089353 ,  5.075167 ,  5.125718 ,  5.883... ,  7.0331497,
         7.872245 ,  5.672419 ,  9.007634 ,  4.099035 ,  7.7506967,
         9.34567  ]], dtype=float32)
 capability = 'sve_f16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02511002, 0.07567891, 0.03735658, 0.08694794, 0.11688765,
       0.07611433, 0.09140281, 0.00909586, 0.25376...20876312,  7.03314972,
         7.87224483,  5.67241907,  9.00763416,  4.09903479,  7.75069666,
         9.34566975]]))
        before     = 9839707103448
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02511002, 0.07567891, 0.03735658, 0.08694794, 0.11688765,
       0.07611433, 0.09140281, 0.00909586, 0.25376835, 0.08906175,
       0.13857573])
 y = array([0.04972251, 0.05546035, 0.09120101, 0.07513535, 0.16308983,
       0.13534352, 0.1512973 , 0.0727937 , 0.11839865, 0.04599389,
       0.04156388])
 z = array([[ 4.80896378,  2.53016782,  6.21312666,  2.87163067,  4.63849878,
         8.08935261,  5.07516718,  5.12571812....20876312,  7.03314972,
         7.87224483,  5.67241907,  9.00763416,  4.09903479,  7.75069666,
         9.34566975]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.02461249,  0.02021856, -0.05384444,  0.01181259, -0.04620218,
       -0.05922919, -0.05989449, -0.06369784,  0.1353697 ,  0.04306786,
        0.09701186])
 x          = array([0.02511002, 0.07567891, 0.03735658, 0.08694794, 0.11688765,
       0.07611433, 0.09140281, 0.00909586, 0.25376835, 0.08906175,
       0.13857573])
 y          = array([0.04972251, 0.05546035, 0.09120101, 0.07513535, 0.16308983,
       0.13534352, 0.1512973 , 0.0727937 , 0.11839865, 0.04599389,
       0.04156388])
 z          = array([[ 4.80896378,  2.53016782,  6.21312666,  2.87163067,  4.63849878,
         8.08935261,  5.07516718,  5.12571812....20876312,  7.03314972,
         7.87224483,  5.67241907,  9.00763416,  4.09903479,  7.75069666,
         9.34566975]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[sve_f16-mahalanobis-dtypes1-97-2-5] ________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.00441022, 0.00489216, 0.01538115, 0.00050228, 0.01616703,
       0.01402179, 0.00836552, 0.00138984, 0.008035...9662,
       0.00172711, 0.01692053, 0.00252061, 0.02218935, 0.01026653,
       0.00277487, 0.00315764], dtype=float32)
 b          = array([6.01845700e-03, 1.08948322e-02, 4.56798263e-03, 7.55763100e-03,
       7.42469030e-03, 1.31698698e-02, 1.707369...5906e-03,
       1.34545611e-03, 7.82904215e-03, 4.28133272e-03, 1.34895137e-02,
       6.58954401e-03], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[54.465603, 63.226494, 52.75766 , ..., 53.046406, 68.18253 ,
        55.52307 ],
       [63.226494, 67.30139 , ...    79.42431 ],
       [55.52307 , 70.1281  , 71.74931 , ..., 59.18447 , 79.42431 ,
        99.64271 ]], dtype=float32)
 capability = 'sve_f16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.00441022, 0.00489216, 0.01538115, 0.00050228, 0.01616703,
       0.01402179, 0.00836552, 0.00138984, 0.00803...2 , 79.42430878],
       [55.52307129, 70.12809753, 71.74931335, ..., 59.18447113,
        79.42430878, 99.64270782]]))
        before     = 9840345926679
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.00441022, 0.00489216, 0.01538115, 0.00050228, 0.01616703,
       0.01402179, 0.00836552, 0.00138984, 0.008035...1398593, 0.00239662,
       0.00172711, 0.01692053, 0.00252061, 0.02218935, 0.01026653,
       0.00277487, 0.00315764])
 y = array([6.01845700e-03, 1.08948322e-02, 4.56798263e-03, 7.55763100e-03,
       7.42469030e-03, 1.31698698e-02, 1.707369...947e-02, 4.76005906e-03,
       1.34545611e-03, 7.82904215e-03, 4.28133272e-03, 1.34895137e-02,
       6.58954401e-03])
 z = array([[54.46560287, 63.22649384, 52.75765991, ..., 53.04640579,
        68.18253326, 55.52307129],
       [63.2264938...62 , 79.42430878],
       [55.52307129, 70.12809753, 71.74931335, ..., 59.18447113,
        79.42430878, 99.64270782]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-1.60823436e-03, -6.00267341e-03,  1.08131664e-02, -7.05535256e-03,
        8.74234317e-03,  8.51918943e-04, -8...2,  1.21604684e-02,
        1.17515353e-03,  1.43603068e-02,  5.98520041e-03, -1.07146474e-02,
       -3.43190413e-03])
 x          = array([0.00441022, 0.00489216, 0.01538115, 0.00050228, 0.01616703,
       0.01402179, 0.00836552, 0.00138984, 0.008035...1398593, 0.00239662,
       0.00172711, 0.01692053, 0.00252061, 0.02218935, 0.01026653,
       0.00277487, 0.00315764])
 y          = array([6.01845700e-03, 1.08948322e-02, 4.56798263e-03, 7.55763100e-03,
       7.42469030e-03, 1.31698698e-02, 1.707369...947e-02, 4.76005906e-03,
       1.34545611e-03, 7.82904215e-03, 4.28133272e-03, 1.34895137e-02,
       6.58954401e-03])
 z          = array([[54.46560287, 63.22649384, 52.75765991, ..., 53.04640579,
        68.18253326, 55.52307129],
       [63.2264938...62 , 79.42430878],
       [55.52307129, 70.12809753, 71.74931335, ..., 59.18447113,
        79.42430878, 99.64270782]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[sve_f16-mahalanobis-dtypes1-97-3-5] ________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([1.81486532e-02, 1.03409998e-02, 1.52832679e-02, 5.80400229e-03,
       8.71454272e-03, 3.79765959e-04, 7.884774...0042e-02,
       2.14015450e-02, 5.60782012e-03, 1.27634630e-02, 1.73955671e-02,
       1.88872144e-02], dtype=float32)
 b          = array([0.00903281, 0.01032633, 0.00214235, 0.02983102, 0.00592219,
       0.01479399, 0.01921996, 0.00939775, 0.000155...3109,
       0.00857335, 0.01116366, 0.00665207, 0.00799696, 0.00093835,
       0.02442409, 0.00111749], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 58.32012 ,  56.733948,  58.414593, ...,  56.88226 ,  60.92819 ,
         58.043415],
       [ 56.733948,  60.....597073],
       [ 58.043415,  56.46456 ,  66.84623 , ...,  56.62578 ,  60.597073,
        112.63678 ]], dtype=float32)
 capability = 'sve_f16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([1.81486532e-02, 1.03409998e-02, 1.52832679e-02, 5.80400229e-03,
       8.71454272e-03, 3.79765959e-04, 7.88477....5970726 ],
       [ 58.04341507,  56.46456146,  66.84622955, ...,  56.6257782 ,
         60.5970726 , 112.63677979]]))
        before     = 9840968831790
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([1.81486532e-02, 1.03409998e-02, 1.52832679e-02, 5.80400229e-03,
       8.71454272e-03, 3.79765959e-04, 7.884774...404e-03, 1.09480042e-02,
       2.14015450e-02, 5.60782012e-03, 1.27634630e-02, 1.73955671e-02,
       1.88872144e-02])
 y = array([0.00903281, 0.01032633, 0.00214235, 0.02983102, 0.00592219,
       0.01479399, 0.01921996, 0.00939775, 0.000155...143324 , 0.00143109,
       0.00857335, 0.01116366, 0.00665207, 0.00799696, 0.00093835,
       0.02442409, 0.00111749])
 z = array([[ 58.32012177,  56.73394775,  58.41459274, ...,  56.88225937,
         60.92818832,  58.04341507],
       [ 56....0.5970726 ],
       [ 58.04341507,  56.46456146,  66.84622955, ...,  56.6257782 ,
         60.5970726 , 112.63677979]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 9.11583845e-03,  1.46720558e-05,  1.31409189e-02, -2.40270216e-02,
        2.79235188e-03, -1.44142261e-02, -1...3, -2.15653330e-04,
        1.47494706e-02, -2.38913856e-03,  1.18251173e-02, -7.02852011e-03,
        1.77697224e-02])
 x          = array([1.81486532e-02, 1.03409998e-02, 1.52832679e-02, 5.80400229e-03,
       8.71454272e-03, 3.79765959e-04, 7.884774...404e-03, 1.09480042e-02,
       2.14015450e-02, 5.60782012e-03, 1.27634630e-02, 1.73955671e-02,
       1.88872144e-02])
 y          = array([0.00903281, 0.01032633, 0.00214235, 0.02983102, 0.00592219,
       0.01479399, 0.01921996, 0.00939775, 0.000155...143324 , 0.00143109,
       0.00857335, 0.01116366, 0.00665207, 0.00799696, 0.00093835,
       0.02442409, 0.00111749])
 z          = array([[ 58.32012177,  56.73394775,  58.41459274, ...,  56.88225937,
         60.92818832,  58.04341507],
       [ 56....0.5970726 ],
       [ 58.04341507,  56.46456146,  66.84622955, ...,  56.6257782 ,
         60.5970726 , 112.63677979]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[sve_bf16-mahalanobis-dtypes1-11-2-5] _______________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.20998585, 0.0200306 , 0.11750436, 0.1214483 , 0.09337511,
       0.0867295 , 0.11970726, 0.01110688, 0.12739854, 0.01771036,
       0.07500324], dtype=float32)
 b          = array([0.00596578, 0.09252455, 0.07500043, 0.07591187, 0.03766003,
       0.2836061 , 0.10895725, 0.02529784, 0.09128135, 0.02896778,
       0.17482705], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 9.254262 ,  6.5411034, 11.629969 ,  6.106625 ,  5.946653 ,
         9.457898 ,  5.784327 ,  9.68044  ,  6.830... ,  8.58384  ,
         4.725831 ,  5.882981 ,  7.801631 ,  5.4928823,  7.0065417,
         9.240583 ]], dtype=float32)
 capability = 'sve_bf16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.20998585, 0.0200306 , 0.11750436, 0.1214483 , 0.09337511,
       0.0867295 , 0.11970726, 0.01110688, 0.12739...60119104,  8.58384037,
         4.72583103,  5.88298082,  7.80163097,  5.49288225,  7.00654173,
         9.24058342]]))
        before     = 9842893380504
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.20998585, 0.0200306 , 0.11750436, 0.1214483 , 0.09337511,
       0.0867295 , 0.11970726, 0.01110688, 0.12739854, 0.01771036,
       0.07500324])
 y = array([0.00596578, 0.09252455, 0.07500043, 0.07591187, 0.03766003,
       0.28360611, 0.10895725, 0.02529784, 0.09128135, 0.02896778,
       0.17482705])
 z = array([[ 9.25426197,  6.54110336, 11.62996864,  6.10662508,  5.94665289,
         9.45789814,  5.78432703,  9.68043995....60119104,  8.58384037,
         4.72583103,  5.88298082,  7.80163097,  5.49288225,  7.00654173,
         9.24058342]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.20402007, -0.07249395,  0.04250393,  0.04553643,  0.05571508,
       -0.19687662,  0.01075   , -0.01419097,  0.03611719, -0.01125742,
       -0.09982381])
 x          = array([0.20998585, 0.0200306 , 0.11750436, 0.1214483 , 0.09337511,
       0.0867295 , 0.11970726, 0.01110688, 0.12739854, 0.01771036,
       0.07500324])
 y          = array([0.00596578, 0.09252455, 0.07500043, 0.07591187, 0.03766003,
       0.28360611, 0.10895725, 0.02529784, 0.09128135, 0.02896778,
       0.17482705])
 z          = array([[ 9.25426197,  6.54110336, 11.62996864,  6.10662508,  5.94665289,
         9.45789814,  5.78432703,  9.68043995....60119104,  8.58384037,
         4.72583103,  5.88298082,  7.80163097,  5.49288225,  7.00654173,
         9.24058342]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[sve_bf16-mahalanobis-dtypes1-97-1-5] _______________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.03372671, 0.03069208, 0.00987814, 0.00870445, 0.0047874 ,
       0.01483125, 0.00049991, 0.00923062, 0.007290...9714,
       0.0029216 , 0.00894824, 0.00869429, 0.01061632, 0.01107571,
       0.01848766, 0.01221226], dtype=float32)
 b          = array([0.02296131, 0.00606068, 0.00035268, 0.01692214, 0.00566399,
       0.01930015, 0.0111663 , 0.01378011, 0.014824...451 ,
       0.00348687, 0.01631781, 0.01066322, 0.01190847, 0.00079124,
       0.01086692, 0.00643546], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[64.23303 , 54.097008, 64.36526 , ..., 53.259716, 63.271053,
        59.990093],
       [54.097008, 58.528633, ...    58.874477],
       [59.990093, 64.28319 , 66.92435 , ..., 59.485073, 58.874477,
        99.48641 ]], dtype=float32)
 capability = 'sve_bf16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.03372671, 0.03069208, 0.00987814, 0.00870445, 0.0047874 ,
       0.01483125, 0.00049991, 0.00923062, 0.00729...97, 58.87447739],
       [59.99009323, 64.28318787, 66.92434692, ..., 59.48507309,
        58.87447739, 99.48641205]]))
        before     = 9843526173265
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.03372671, 0.03069208, 0.00987814, 0.00870445, 0.0047874 ,
       0.01483125, 0.00049991, 0.00923062, 0.007290...0134815, 0.01129714,
       0.0029216 , 0.00894824, 0.00869429, 0.01061632, 0.01107571,
       0.01848766, 0.01221226])
 y = array([0.02296131, 0.00606068, 0.00035268, 0.01692214, 0.00566399,
       0.01930015, 0.0111663 , 0.01378011, 0.014824...2782314, 0.0098451 ,
       0.00348687, 0.01631781, 0.01066322, 0.01190847, 0.00079124,
       0.01086692, 0.00643546])
 z = array([[64.23303223, 54.09700775, 64.36525726, ..., 53.25971603,
        63.27105331, 59.99009323],
       [54.0970077...097, 58.87447739],
       [59.99009323, 64.28318787, 66.92434692, ..., 59.48507309,
        58.87447739, 99.48641205]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.01076539,  0.0246314 ,  0.00952546, -0.00821769, -0.00087659,
       -0.00446889, -0.01066639, -0.00454949, ...  0.00145204,
       -0.00056528, -0.00736956, -0.00196894, -0.00129215,  0.01028447,
        0.00762074,  0.0057768 ])
 x          = array([0.03372671, 0.03069208, 0.00987814, 0.00870445, 0.0047874 ,
       0.01483125, 0.00049991, 0.00923062, 0.007290...0134815, 0.01129714,
       0.0029216 , 0.00894824, 0.00869429, 0.01061632, 0.01107571,
       0.01848766, 0.01221226])
 y          = array([0.02296131, 0.00606068, 0.00035268, 0.01692214, 0.00566399,
       0.01930015, 0.0111663 , 0.01378011, 0.014824...2782314, 0.0098451 ,
       0.00348687, 0.01631781, 0.01066322, 0.01190847, 0.00079124,
       0.01086692, 0.00643546])
 z          = array([[64.23303223, 54.09700775, 64.36525726, ..., 53.25971603,
        63.27105331, 59.99009323],
       [54.0970077...097, 58.87447739],
       [59.99009323, 64.28318787, 66.92434692, ..., 59.48507309,
        58.87447739, 99.48641205]])

 scripts/test.py:152: RuntimeWarning
 _______________ test_curved[sve_bf16-mahalanobis-dtypes1-97-5-5] _______________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([2.1533027e-02, 9.0264727e-04, 3.1828102e-02, 1.3926838e-03,
       2.4869470e-03, 3.2237680e-03, 1.3775007e-02,...3.8423440e-03,
       7.3279981e-03, 9.9575715e-03, 1.0968118e-02, 1.3213096e-03,
       8.9544710e-03], dtype=float32)
 b          = array([0.00121649, 0.01684183, 0.0178299 , 0.00034617, 0.01522439,
       0.00374493, 0.00071212, 0.00553896, 0.025993...5053,
       0.00384992, 0.01447484, 0.00994682, 0.00029212, 0.01454309,
       0.00888389, 0.00779052], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[83.78387 , 59.27108 , 63.52314 , ..., 73.092125, 73.26535 ,
        69.92523 ],
       [59.27108 , 75.52895 , ...    56.137836],
       [69.92523 , 68.90072 , 62.693134, ..., 49.377693, 56.137836,
        85.6153  ]], dtype=float32)
 capability = 'sve_bf16'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([2.15330273e-02, 9.02647269e-04, 3.18281017e-02, 1.39268383e-03,
       2.48694699e-03, 3.22376797e-03, 1.37750...7 , 56.13783646],
       [69.92523193, 68.90071869, 62.69313431, ..., 49.37769318,
        56.13783646, 85.61530304]]))
        before     = 9844212675858
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([2.15330273e-02, 9.02647269e-04, 3.18281017e-02, 1.39268383e-03,
       2.48694699e-03, 3.22376797e-03, 1.377500...736e-03, 3.84234404e-03,
       7.32799806e-03, 9.95757151e-03, 1.09681180e-02, 1.32130960e-03,
       8.95447098e-03])
 y = array([0.00121649, 0.01684183, 0.0178299 , 0.00034617, 0.01522439,
       0.00374493, 0.00071212, 0.00553896, 0.025993...0119331, 0.00305053,
       0.00384992, 0.01447484, 0.00994682, 0.00029212, 0.01454309,
       0.00888389, 0.00779052])
 z = array([[83.78386688, 59.27108002, 63.52313995, ..., 73.09212494,
        73.26535034, 69.92523193],
       [59.2710800...37 , 56.13783646],
       [69.92523193, 68.90071869, 62.69313431, ..., 49.37769318,
        56.13783646, 85.61530304]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 2.03165365e-02, -1.59391853e-02,  1.39981974e-02,  1.04651559e-03,
       -1.27374418e-02, -5.21162990e-04,  1...3, -1.06324977e-02,
       -2.61881854e-03,  9.66545238e-03, -3.57497018e-03, -7.56258483e-03,
        1.16394879e-03])
 x          = array([2.15330273e-02, 9.02647269e-04, 3.18281017e-02, 1.39268383e-03,
       2.48694699e-03, 3.22376797e-03, 1.377500...736e-03, 3.84234404e-03,
       7.32799806e-03, 9.95757151e-03, 1.09681180e-02, 1.32130960e-03,
       8.95447098e-03])
 y          = array([0.00121649, 0.01684183, 0.0178299 , 0.00034617, 0.01522439,
       0.00374493, 0.00071212, 0.00553896, 0.025993...0119331, 0.00305053,
       0.00384992, 0.01447484, 0.00994682, 0.00029212, 0.01454309,
       0.00888389, 0.00779052])
 z          = array([[83.78386688, 59.27108002, 63.52313995, ..., 73.09212494,
        73.26535034, 69.92523193],
       [59.2710800...37 , 56.13783646],
       [69.92523193, 68.90071869, 62.69313431, ..., 49.37769318,
        56.13783646, 85.61530304]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved[sve_i8-mahalanobis-dtypes1-11-1-5] ________________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.19096065, 0.00526326, 0.11573447, 0.08472496, 0.12671551,
       0.09909475, 0.02733791, 0.24492739, 0.02740765, 0.01650291,
       0.06133049], dtype=float32)
 b          = array([0.0432984 , 0.03388502, 0.21997818, 0.25591516, 0.0274406 ,
       0.13738576, 0.15388982, 0.03249949, 0.03062573, 0.0487055 ,
       0.01637628], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 7.7454085,  8.629185 ,  4.2187376,  6.992317 ,  7.1778083,
         4.4187   ,  4.420358 ,  8.176778 ,  6.034...2,  2.4718688,
         1.9043695,  2.2422047,  2.2948205,  2.1273131,  2.4385598,
         2.6936703]], dtype=float32)
 capability = 'sve_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.19096065, 0.00526326, 0.11573447, 0.08472496, 0.12671551,
       0.09909475, 0.02733791, 0.24492739, 0.02740...07351923,  2.47186875,
         1.90436947,  2.24220467,  2.29482055,  2.12731314,  2.43855977,
         2.69367027]]))
        before     = 9846051887272
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.19096065, 0.00526326, 0.11573447, 0.08472496, 0.12671551,
       0.09909475, 0.02733791, 0.24492739, 0.02740765, 0.01650291,
       0.06133049])
 y = array([0.0432984 , 0.03388502, 0.21997818, 0.25591516, 0.0274406 ,
       0.13738576, 0.15388982, 0.03249949, 0.03062573, 0.0487055 ,
       0.01637628])
 z = array([[ 7.74540854,  8.62918472,  4.2187376 ,  6.9923172 ,  7.17780828,
         4.41870022,  4.42035818,  8.17677784....07351923,  2.47186875,
         1.90436947,  2.24220467,  2.29482055,  2.12731314,  2.43855977,
         2.69367027]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.14766224, -0.02862176, -0.10424371, -0.17119021,  0.09927491,
       -0.03829101, -0.12655191,  0.2124279 , -0.00321808, -0.03220258,
        0.0449542 ])
 x          = array([0.19096065, 0.00526326, 0.11573447, 0.08472496, 0.12671551,
       0.09909475, 0.02733791, 0.24492739, 0.02740765, 0.01650291,
       0.06133049])
 y          = array([0.0432984 , 0.03388502, 0.21997818, 0.25591516, 0.0274406 ,
       0.13738576, 0.15388982, 0.03249949, 0.03062573, 0.0487055 ,
       0.01637628])
 z          = array([[ 7.74540854,  8.62918472,  4.2187376 ,  6.9923172 ,  7.17780828,
         4.41870022,  4.42035818,  8.17677784....07351923,  2.47186875,
         1.90436947,  2.24220467,  2.29482055,  2.12731314,  2.43855977,
         2.69367027]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved[sve_i8-mahalanobis-dtypes1-11-3-5] ________________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.02134473, 0.17088294, 0.02070476, 0.1797113 , 0.14342932,
       0.02535673, 0.00600438, 0.14452413, 0.12226658, 0.00833328,
       0.1574419 ], dtype=float32)
 b          = array([0.02206616, 0.17520632, 0.08448462, 0.16246635, 0.01197341,
       0.07742451, 0.04386871, 0.13913773, 0.13106287, 0.08866476,
       0.06364459], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[10.239367 ,  4.6873007,  9.05207  ,  8.227271 ,  9.251494 ,
         7.4201117,  7.457942 , 13.32158  ,  3.933...6,  5.3243403,
         4.640505 ,  5.8028874,  8.404956 ,  8.853975 ,  7.1954737,
         5.1838937]], dtype=float32)
 capability = 'sve_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02134473, 0.17088294, 0.02070476, 0.1797113 , 0.14342932,
       0.02535673, 0.00600438, 0.14452413, 0.12226...08873558,  5.32434034,
         4.64050484,  5.80288744,  8.40495586,  8.8539753 ,  7.19547367,
         5.18389368]]))
        before     = 9846657775226
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02134473, 0.17088294, 0.02070476, 0.1797113 , 0.14342932,
       0.02535673, 0.00600438, 0.14452413, 0.12226658, 0.00833328,
       0.1574419 ])
 y = array([0.02206616, 0.17520632, 0.08448462, 0.16246635, 0.01197341,
       0.07742451, 0.04386871, 0.13913773, 0.13106287, 0.08866476,
       0.06364459])
 z = array([[10.23936653,  4.68730068,  9.05206966,  8.22727108,  9.25149441,
         7.42011166,  7.45794201, 13.32157993....08873558,  5.32434034,
         4.64050484,  5.80288744,  8.40495586,  8.8539753 ,  7.19547367,
         5.18389368]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.00072142, -0.00432338, -0.06377986,  0.01724495,  0.13145592,
       -0.05206778, -0.03786433,  0.0053864 , -0.00879628, -0.08033148,
        0.09379731])
 x          = array([0.02134473, 0.17088294, 0.02070476, 0.1797113 , 0.14342932,
       0.02535673, 0.00600438, 0.14452413, 0.12226658, 0.00833328,
       0.1574419 ])
 y          = array([0.02206616, 0.17520632, 0.08448462, 0.16246635, 0.01197341,
       0.07742451, 0.04386871, 0.13913773, 0.13106287, 0.08866476,
       0.06364459])
 z          = array([[10.23936653,  4.68730068,  9.05206966,  8.22727108,  9.25149441,
         7.42011166,  7.45794201, 13.32157993....08873558,  5.32434034,
         4.64050484,  5.80288744,  8.40495586,  8.8539753 ,  7.19547367,
         5.18389368]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved[sve_i8-mahalanobis-dtypes1-11-5-5] ________________

 ndim = 11, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.00229023, 0.10137389, 0.00735305, 0.11483254, 0.20596172,
       0.11647353, 0.06990387, 0.06622959, 0.14586082, 0.03872812,
       0.13099262], dtype=float32)
 b          = array([0.03184978, 0.24500012, 0.06002821, 0.01108921, 0.13274156,
       0.09381456, 0.05305798, 0.09494982, 0.13214257, 0.07846637,
       0.06685989], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.5364604,  6.7816663,  3.8010864,  5.897234 ,  3.7064707,
         5.7672   ,  9.901699 ,  6.9070044,  5.890...6,  7.124165 ,
         6.8793426,  6.390428 ,  6.431571 ,  7.015909 ,  5.984683 ,
         6.2815456]], dtype=float32)
 capability = 'sve_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.00229023, 0.10137389, 0.00735305, 0.11483254, 0.20596172,
       0.11647353, 0.06990387, 0.06622959, 0.14586...58953762,  7.12416506,
         6.87934256,  6.39042807,  6.43157101,  7.01590919,  5.98468304,
         6.28154564]]))
        before     = 9847329701018
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.00229023, 0.10137389, 0.00735305, 0.11483254, 0.20596172,
       0.11647353, 0.06990387, 0.06622959, 0.14586082, 0.03872812,
       0.13099262])
 y = array([0.03184978, 0.24500012, 0.06002821, 0.01108921, 0.13274156,
       0.09381456, 0.05305798, 0.09494982, 0.13214257, 0.07846637,
       0.06685989])
 z = array([[ 6.5364604 ,  6.78166628,  3.80108643,  5.89723396,  3.70647073,
         5.76719999,  9.90169907,  6.90700436....58953762,  7.12416506,
         6.87934256,  6.39042807,  6.43157101,  7.01590919,  5.98468304,
         6.28154564]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.02955955, -0.14362624, -0.05267516,  0.10374333,  0.07322016,
        0.02265897,  0.01684589, -0.02872023,  0.01371825, -0.03973825,
        0.06413274])
 x          = array([0.00229023, 0.10137389, 0.00735305, 0.11483254, 0.20596172,
       0.11647353, 0.06990387, 0.06622959, 0.14586082, 0.03872812,
       0.13099262])
 y          = array([0.03184978, 0.24500012, 0.06002821, 0.01108921, 0.13274156,
       0.09381456, 0.05305798, 0.09494982, 0.13214257, 0.07846637,
       0.06685989])
 z          = array([[ 6.5364604 ,  6.78166628,  3.80108643,  5.89723396,  3.70647073,
         5.76719999,  9.90169907,  6.90700436....58953762,  7.12416506,
         6.87934256,  6.39042807,  6.43157101,  7.01590919,  5.98468304,
         6.28154564]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved[sve_i8-mahalanobis-dtypes1-97-1-5] ________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.01273924, 0.00578753, 0.00597026, 0.00118851, 0.00522025,
       0.00909672, 0.01279081, 0.00182441, 0.009227...8509,
       0.00285059, 0.00295465, 0.00784399, 0.00490429, 0.0110454 ,
       0.01177512, 0.00212573], dtype=float32)
 b          = array([0.00861232, 0.00603244, 0.00224314, 0.00187969, 0.03346   ,
       0.00312444, 0.01020421, 0.0094191 , 0.016284...9634,
       0.01239941, 0.010604  , 0.01268323, 0.00359069, 0.00329981,
       0.00976993, 0.02030066], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 50.43951 ,  53.352993,  54.03103 , ...,  58.43641 ,  58.086174,
         59.700287],
       [ 53.352993,  68.....44554 ],
       [ 59.700287,  66.52929 ,  63.179615, ...,  67.585205,  73.44554 ,
        115.12158 ]], dtype=float32)
 capability = 'sve_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.01273924, 0.00578753, 0.00597026, 0.00118851, 0.00522025,
       0.00909672, 0.01279081, 0.00182441, 0.00922....44554138],
       [ 59.70028687,  66.52928925,  63.17961502, ...,  67.58520508,
         73.44554138, 115.12158203]]))
        before     = 9848002967911
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.01273924, 0.00578753, 0.00597026, 0.00118851, 0.00522025,
       0.00909672, 0.01279081, 0.00182441, 0.009227...0184347, 0.00958509,
       0.00285059, 0.00295465, 0.00784399, 0.00490429, 0.0110454 ,
       0.01177512, 0.00212573])
 y = array([0.00861232, 0.00603244, 0.00224314, 0.00187969, 0.03346   ,
       0.00312444, 0.01020421, 0.0094191 , 0.016284...0821166, 0.01579634,
       0.01239941, 0.010604  , 0.01268323, 0.00359069, 0.00329981,
       0.00976993, 0.02030066])
 z = array([[ 50.43951035,  53.35299301,  54.03102875, ...,  58.436409  ,
         58.08617401,  59.70028687],
       [ 53....3.44554138],
       [ 59.70028687,  66.52928925,  63.17961502, ...,  67.58520508,
         73.44554138, 115.12158203]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.00412692, -0.00024491,  0.00372712, -0.00069118, -0.02823975,
        0.00597227,  0.00258661, -0.00759469, ... -0.00621125,
       -0.00954882, -0.00764935, -0.00483924,  0.0013136 ,  0.00774559,
        0.00200519, -0.01817493])
 x          = array([0.01273924, 0.00578753, 0.00597026, 0.00118851, 0.00522025,
       0.00909672, 0.01279081, 0.00182441, 0.009227...0184347, 0.00958509,
       0.00285059, 0.00295465, 0.00784399, 0.00490429, 0.0110454 ,
       0.01177512, 0.00212573])
 y          = array([0.00861232, 0.00603244, 0.00224314, 0.00187969, 0.03346   ,
       0.00312444, 0.01020421, 0.0094191 , 0.016284...0821166, 0.01579634,
       0.01239941, 0.010604  , 0.01268323, 0.00359069, 0.00329981,
       0.00976993, 0.02030066])
 z          = array([[ 50.43951035,  53.35299301,  54.03102875, ...,  58.436409  ,
         58.08617401,  59.70028687],
       [ 53....3.44554138],
       [ 59.70028687,  66.52928925,  63.17961502, ...,  67.58520508,
         73.44554138, 115.12158203]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved[sve_i8-mahalanobis-dtypes1-97-3-5] ________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.0137424 , 0.01531135, 0.02644389, 0.02095761, 0.0059608 ,
       0.00790102, 0.01285701, 0.00686216, 0.018892...4532,
       0.0102979 , 0.00256194, 0.01548835, 0.01106838, 0.00977083,
       0.01527428, 0.00758731], dtype=float32)
 b          = array([1.44603141e-02, 5.73056238e-03, 2.16517672e-02, 6.94837561e-03,
       1.31948208e-02, 1.77239105e-02, 1.707259...5217e-03,
       7.03300769e-03, 1.21783232e-02, 6.33386569e-03, 1.49035046e-03,
       4.15999163e-03], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 48.78445 ,  50.34693 ,  64.91033 , ...,  54.57214 ,  53.50093 ,
         58.368355],
       [ 50.34693 ,  59.....508106],
       [ 58.368355,  60.324722,  64.01256 , ...,  68.840294,  62.508106,
        108.50336 ]], dtype=float32)
 capability = 'sve_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.0137424 , 0.01531135, 0.02644389, 0.02095761, 0.0059608 ,
       0.00790102, 0.01285701, 0.00686216, 0.01889....50810623],
       [ 58.3683548 ,  60.32472229,  64.01255798, ...,  68.84029388,
         62.50810623, 108.50335693]]))
        before     = 9848712430500
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.0137424 , 0.01531135, 0.02644389, 0.02095761, 0.0059608 ,
       0.00790102, 0.01285701, 0.00686216, 0.018892...0158118, 0.01834532,
       0.0102979 , 0.00256194, 0.01548835, 0.01106838, 0.00977083,
       0.01527428, 0.00758731])
 y = array([1.44603141e-02, 5.73056238e-03, 2.16517672e-02, 6.94837561e-03,
       1.31948208e-02, 1.77239105e-02, 1.707259...236e-03, 5.79655217e-03,
       7.03300769e-03, 1.21783232e-02, 6.33386569e-03, 1.49035046e-03,
       4.15999163e-03])
 z = array([[ 48.78445053,  50.34693146,  64.91033173, ...,  54.57213974,
         53.50093079,  58.3683548 ],
       [ 50....2.50810623],
       [ 58.3683548 ,  60.32472229,  64.01255798, ...,  68.84029388,
         62.50810623, 108.50335693]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-7.17915595e-04,  9.58079146e-03,  4.79211845e-03,  1.40092359e-02,
       -7.23402295e-03, -9.82289109e-03, -4...3, -3.23461206e-03,
        8.45533749e-03, -1.10994279e-03,  3.43696540e-03,  1.37839274e-02,
        3.42731411e-03])
 x          = array([0.0137424 , 0.01531135, 0.02644389, 0.02095761, 0.0059608 ,
       0.00790102, 0.01285701, 0.00686216, 0.018892...0158118, 0.01834532,
       0.0102979 , 0.00256194, 0.01548835, 0.01106838, 0.00977083,
       0.01527428, 0.00758731])
 y          = array([1.44603141e-02, 5.73056238e-03, 2.16517672e-02, 6.94837561e-03,
       1.31948208e-02, 1.77239105e-02, 1.707259...236e-03, 5.79655217e-03,
       7.03300769e-03, 1.21783232e-02, 6.33386569e-03, 1.49035046e-03,
       4.15999163e-03])
 z          = array([[ 48.78445053,  50.34693146,  64.91033173, ...,  54.57213974,
         53.50093079,  58.3683548 ],
       [ 50....2.50810623],
       [ 58.3683548 ,  60.32472229,  64.01255798, ...,  68.84029388,
         62.50810623, 108.50335693]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved[sve_i8-mahalanobis-dtypes1-97-4-5] ________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([0.00567625, 0.01205613, 0.01147073, 0.01788832, 0.00262296,
       0.02627755, 0.00820947, 0.02222227, 0.004176...4837,
       0.00144045, 0.00900602, 0.00888284, 0.01303986, 0.00638132,
       0.01345382, 0.01059228], dtype=float32)
 b          = array([1.30478467e-03, 1.81152504e-02, 8.01333226e-03, 2.49990448e-03,
       3.96093130e-02, 5.21787396e-03, 4.339264...2871e-02,
       1.81437365e-03, 1.73998754e-02, 1.68137215e-02, 1.52610959e-02,
       1.25489496e-02], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 80.71837 ,  65.093414,  74.71641 , ...,  65.98503 ,  65.80701 ,
         72.40151 ],
       [ 65.093414,  59.....812996],
       [ 72.40151 ,  59.334488,  52.472702, ...,  68.98507 ,  67.812996,
        106.07909 ]], dtype=float32)
 capability = 'sve_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.00567625, 0.01205613, 0.01147073, 0.01788832, 0.00262296,
       0.02627755, 0.00820947, 0.02222227, 0.00417....81299591],
       [ 72.40151215,  59.33448792,  52.47270203, ...,  68.98506927,
         67.81299591, 106.0790863 ]]))
        before     = 9849344556747
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.00567625, 0.01205613, 0.01147073, 0.01788832, 0.00262296,
       0.02627755, 0.00820947, 0.02222227, 0.004176...0387254, 0.01054837,
       0.00144045, 0.00900602, 0.00888284, 0.01303986, 0.00638132,
       0.01345382, 0.01059228])
 y = array([1.30478467e-03, 1.81152504e-02, 8.01333226e-03, 2.49990448e-03,
       3.96093130e-02, 5.21787396e-03, 4.339264...414e-02, 2.93842871e-02,
       1.81437365e-03, 1.73998754e-02, 1.68137215e-02, 1.52610959e-02,
       1.25489496e-02])
 z = array([[ 80.71836853,  65.09341431,  74.71640778, ...,  65.98503113,
         65.80700684,  72.40151215],
       [ 65....7.81299591],
       [ 72.40151215,  59.33448792,  52.47270203, ...,  68.98506927,
         67.81299591, 106.0790863 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 4.37146157e-03, -6.05911762e-03,  3.45740002e-03,  1.53884180e-02,
       -3.69863533e-02,  2.10596719e-02,  7...2, -2.03782683e-02,
        7.06846185e-03, -4.36001085e-03, -1.04324040e-02, -1.80727802e-03,
       -1.95666775e-03])
 x          = array([0.00567625, 0.01205613, 0.01147073, 0.01788832, 0.00262296,
       0.02627755, 0.00820947, 0.02222227, 0.004176...0387254, 0.01054837,
       0.00144045, 0.00900602, 0.00888284, 0.01303986, 0.00638132,
       0.01345382, 0.01059228])
 y          = array([1.30478467e-03, 1.81152504e-02, 8.01333226e-03, 2.49990448e-03,
       3.96093130e-02, 5.21787396e-03, 4.339264...414e-02, 2.93842871e-02,
       1.81437365e-03, 1.73998754e-02, 1.68137215e-02, 1.52610959e-02,
       1.25489496e-02])
 z          = array([[ 80.71836853,  65.09341431,  74.71640778, ...,  65.98503113,
         65.80700684,  72.40151215],
       [ 65....7.81299591],
       [ 72.40151215,  59.33448792,  52.47270203, ...,  68.98506927,
         67.81299591, 106.0790863 ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved[sve_i8-mahalanobis-dtypes1-97-5-5] ________________

 ndim = 97, dtypes = ('float32', 'float32'), metric = 'mahalanobis'
 capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 97])
    @pytest.mark.parametrize(
        "dtypes",  # representation datatype and compute precision
        [
            ("float64", "float64"),
            ("float32", "float32"),
            ("float16", "float32"),  # otherwise NumPy keeps aggregating too much error
        ],
    )
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved(ndim, dtypes, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for IEEE standard floating-point types."""
    
        dtype, compute_dtype = dtypes
        if dtype == "float16" and is_running_under_qemu():
            pytest.skip("Testing low-precision math isn't reliable in QEMU")
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(dtype))
        b = np.abs(np.random.randn(ndim).astype(dtype))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(dtype))
        c = np.dot(c, c.T)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a.astype(np.float64),
            b.astype(np.float64),
            c.astype(np.float64),
        )

 a          = array([8.6851344e-03, 6.5441341e-03, 1.6384967e-02, 1.6904976e-02,
       2.2630934e-02, 7.9359990e-03, 1.6588589e-02,...1.3494906e-05,
       4.7633357e-04, 1.2472113e-02, 9.8339273e-03, 1.5780041e-02,
       1.3107338e-03], dtype=float32)
 b          = array([0.00211251, 0.00949532, 0.00909596, 0.02117606, 0.00542945,
       0.00867905, 0.01023987, 0.01596099, 0.015210...0821,
       0.00794972, 0.00317908, 0.01237506, 0.0031067 , 0.02549893,
       0.00820823, 0.00655788], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[72.41444 , 60.48242 , 68.68724 , ..., 61.752415, 63.156994,
        62.467934],
       [60.48242 , 51.14481 , ...    60.13458 ],
       [62.467934, 56.86639 , 62.51293 , ..., 63.037567, 60.13458 ,
        86.51693 ]], dtype=float32)
 capability = 'sve_i8'
 compute_dtype = 'float32'
 dtype      = 'float32'
 dtypes     = ('float32', 'float32')
 metric     = 'mahalanobis'
 ndim       = 97
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:656: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([8.68513435e-03, 6.54413411e-03, 1.63849667e-02, 1.69049762e-02,
       2.26309337e-02, 7.93599896e-03, 1.65885...36, 60.1345787 ],
       [62.46793365, 56.86639023, 62.51293182, ..., 63.03756714,
        60.1345787 , 86.51692963]]))
        before     = 9849961633417
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([8.68513435e-03, 6.54413411e-03, 1.63849667e-02, 1.69049762e-02,
       2.26309337e-02, 7.93599896e-03, 1.658858...696e-03, 1.34949059e-05,
       4.76333575e-04, 1.24721127e-02, 9.83392727e-03, 1.57800410e-02,
       1.31073385e-03])
 y = array([0.00211251, 0.00949532, 0.00909596, 0.02117606, 0.00542945,
       0.00867905, 0.01023987, 0.01596099, 0.015210...0082989, 0.00150821,
       0.00794972, 0.00317908, 0.01237506, 0.0031067 , 0.02549893,
       0.00820823, 0.00655788])
 z = array([[72.41443634, 60.48242188, 68.6872406 , ..., 61.7524147 ,
        63.15699387, 62.46793365],
       [60.4824218...536, 60.1345787 ],
       [62.46793365, 56.86639023, 62.51293182, ..., 63.03756714,
        60.1345787 , 86.51692963]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 6.57262816e-03, -2.95118475e-03,  7.28901103e-03, -4.27107885e-03,
        1.72014879e-02, -7.43046403e-04,  6...3, -3.16558448e-03,
       -1.18987297e-02,  9.36541683e-03, -1.56650068e-02,  7.57181458e-03,
       -5.24714659e-03])
 x          = array([8.68513435e-03, 6.54413411e-03, 1.63849667e-02, 1.69049762e-02,
       2.26309337e-02, 7.93599896e-03, 1.658858...696e-03, 1.34949059e-05,
       4.76333575e-04, 1.24721127e-02, 9.83392727e-03, 1.57800410e-02,
       1.31073385e-03])
 y          = array([0.00211251, 0.00949532, 0.00909596, 0.02117606, 0.00542945,
       0.00867905, 0.01023987, 0.01596099, 0.015210...0082989, 0.00150821,
       0.00794972, 0.00317908, 0.01237506, 0.0031067 , 0.02549893,
       0.00820823, 0.00655788])
 z          = array([[72.41443634, 60.48242188, 68.6872406 , ..., 61.7524147 ,
        63.15699387, 62.46793365],
       [60.4824218...536, 60.1345787 ],
       [62.46793365, 56.86639023, 62.51293182, ..., 63.03756714,
        60.1345787 , 86.51692963]])

 scripts/test.py:152: RuntimeWarning
 __________________ test_curved_bf16[neon-mahalanobis-11-2-5] ___________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.15737216, 0.07362095, 0.17663386, 0.05938813, 0.15533644,
       0.01374134, 0.00344269, 0.0541715 , 0.04236267, 0.09039976,
       0.17353044], dtype=float32)
 a_bf16     = array([15905, 15767, 15925, 15731, 15903, 15457, 15202, 15710, 15662,
       15801, 15922], dtype=uint16)
 a_f32_rounded = array([0.15722656, 0.07373047, 0.17675781, 0.05932617, 0.15527344,
       0.01373291, 0.00344849, 0.05419922, 0.04248047, 0.09033203,
       0.17382812], dtype=float32)
 b          = array([0.08019404, 0.0546619 , 0.00213525, 0.05573627, 0.0515562 ,
       0.19658211, 0.16779397, 0.07494679, 0.2166836 , 0.05038694,
       0.04932301], dtype=float32)
 b_bf16     = array([15780, 15712, 15116, 15716, 15699, 15945, 15916, 15769, 15966,
       15694, 15690], dtype=uint16)
 b_f32_rounded = array([0.08007812, 0.0546875 , 0.00213623, 0.05566406, 0.05151367,
       0.19628906, 0.16796875, 0.07470703, 0.21679688, 0.05029297,
       0.04931641], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 9.138401 ,  8.2846575,  6.7119637,  4.929004 ,  9.105889 ,
         8.323881 ,  5.984715 ,  7.3160114,  6.400...3,  9.646407 ,
         7.193979 ,  6.5342426,  8.730336 ,  7.1245093,  4.9020634,
         6.286193 ]], dtype=float32)
 c_bf16     = array([[16658, 16645, 16599, 16542, 16658, 16645, 16576, 16618, 16589,
        16547, 16624],
       [16645, 16637, 16...8, 16541],
       [16624, 16662, 16616, 16546, 16666, 16614, 16593, 16652, 16612,
        16541, 16585]], dtype=uint16)
 c_f32_rounded = array([[ 9.125  ,  8.3125 ,  6.71875,  4.9375 ,  9.125  ,  8.3125 ,
         6.     ,  7.3125 ,  6.40625,  5.09375,  7...5  ,  7.25   ,  5.0625 ,  9.625  ,  7.1875 ,
         6.53125,  8.75   ,  7.125  ,  4.90625,  6.28125]], dtype=float32)
 capability = 'neon'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.15722656, 0.07373047, 0.17675781, 0.05932617, 0.15527344,
       0.01373291, 0.00344849, 0.05419922, 0.04248...7.5    ,  9.375  ,  7.25   ,  5.0625 ,  9.625  ,  7.1875 ,
         6.53125,  8.75   ,  7.125  ,  4.90625,  6.28125]]))
        before     = 9865383511494
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.15722656, 0.07373047, 0.17675781, 0.05932617, 0.15527344,
       0.01373291, 0.00344849, 0.05419922, 0.04248047, 0.09033203,
       0.17382812])
 y = array([0.08007812, 0.0546875 , 0.00213623, 0.05566406, 0.05151367,
       0.19628906, 0.16796875, 0.07470703, 0.21679688, 0.05029297,
       0.04931641])
 z = array([[ 9.125  ,  8.3125 ,  6.71875,  4.9375 ,  9.125  ,  8.3125 ,
         6.     ,  7.3125 ,  6.40625,  5.09375,  7... 7.5    ,  9.375  ,  7.25   ,  5.0625 ,  9.625  ,  7.1875 ,
         6.53125,  8.75   ,  7.125  ,  4.90625,  6.28125]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.07714844,  0.01904297,  0.17462158,  0.00366211,  0.10375977,
       -0.18255615, -0.16452026, -0.02050781, -0.17431641,  0.04003906,
        0.12451172])
 x          = array([0.15722656, 0.07373047, 0.17675781, 0.05932617, 0.15527344,
       0.01373291, 0.00344849, 0.05419922, 0.04248047, 0.09033203,
       0.17382812])
 y          = array([0.08007812, 0.0546875 , 0.00213623, 0.05566406, 0.05151367,
       0.19628906, 0.16796875, 0.07470703, 0.21679688, 0.05029297,
       0.04931641])
 z          = array([[ 9.125  ,  8.3125 ,  6.71875,  4.9375 ,  9.125  ,  8.3125 ,
         6.     ,  7.3125 ,  6.40625,  5.09375,  7... 7.5    ,  9.375  ,  7.25   ,  5.0625 ,  9.625  ,  7.1875 ,
         6.53125,  8.75   ,  7.125  ,  4.90625,  6.28125]])

 scripts/test.py:152: RuntimeWarning
 __________________ test_curved_bf16[neon-mahalanobis-11-4-5] ___________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.11449459, 0.19555509, 0.0575695 , 0.15924539, 0.04827316,
       0.0824703 , 0.02812681, 0.01318203, 0.10469493, 0.12264735,
       0.07374086], dtype=float32)
 a_bf16     = array([15850, 15944, 15724, 15907, 15686, 15785, 15590, 15448, 15830,
       15867, 15767], dtype=uint16)
 a_f32_rounded = array([0.11425781, 0.1953125 , 0.05761719, 0.15917969, 0.04833984,
       0.08251953, 0.02807617, 0.01318359, 0.10449219, 0.12255859,
       0.07373047], dtype=float32)
 b          = array([0.00801033, 0.07395937, 0.2712878 , 0.0349329 , 0.1178707 ,
       0.04699034, 0.07773169, 0.05445457, 0.20698565, 0.02831857,
       0.07945813], dtype=float32)
 b_bf16     = array([15363, 15767, 16011, 15631, 15857, 15680, 15775, 15711, 15956,
       15592, 15779], dtype=uint16)
 b_f32_rounded = array([0.00799561, 0.07373047, 0.27148438, 0.03491211, 0.11767578,
       0.046875  , 0.07763672, 0.05444336, 0.20703125, 0.02832031,
       0.07958984], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 4.947346 ,  4.5684915,  7.0727086,  2.3730888,  9.547079 ,
         6.7738657,  7.1226883,  9.755802 ,  6.263...6,  5.0622897,
         9.000133 ,  6.1354475,  4.9309874,  6.0099397,  4.828717 ,
         6.918633 ]], dtype=float32)
 c_bf16     = array([[16542, 16530, 16610, 16408, 16665, 16601, 16612, 16668, 16584,
        16478, 16556],
       [16530, 16662, 16...5, 16539],
       [16556, 16659, 16626, 16605, 16546, 16656, 16580, 16542, 16576,
        16539, 16605]], dtype=uint16)
 c_f32_rounded = array([[ 4.9375 ,  4.5625 ,  7.0625 ,  2.375  ,  9.5625 ,  6.78125,
         7.125  ,  9.75   ,  6.25   ,  3.46875,  5...75 ,  7.5625 ,  6.90625,  5.0625 ,  9.     ,
         6.125  ,  4.9375 ,  6.     ,  4.84375,  6.90625]], dtype=float32)
 capability = 'neon'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.11425781, 0.1953125 , 0.05761719, 0.15917969, 0.04833984,
       0.08251953, 0.02807617, 0.01318359, 0.10449...5.375  ,  9.1875 ,  7.5625 ,  6.90625,  5.0625 ,  9.     ,
         6.125  ,  4.9375 ,  6.     ,  4.84375,  6.90625]]))
        before     = 9866091343251
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.11425781, 0.1953125 , 0.05761719, 0.15917969, 0.04833984,
       0.08251953, 0.02807617, 0.01318359, 0.10449219, 0.12255859,
       0.07373047])
 y = array([0.00799561, 0.07373047, 0.27148438, 0.03491211, 0.11767578,
       0.046875  , 0.07763672, 0.05444336, 0.20703125, 0.02832031,
       0.07958984])
 z = array([[ 4.9375 ,  4.5625 ,  7.0625 ,  2.375  ,  9.5625 ,  6.78125,
         7.125  ,  9.75   ,  6.25   ,  3.46875,  5... 5.375  ,  9.1875 ,  7.5625 ,  6.90625,  5.0625 ,  9.     ,
         6.125  ,  4.9375 ,  6.     ,  4.84375,  6.90625]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.10626221,  0.12158203, -0.21386719,  0.12426758, -0.06933594,
        0.03564453, -0.04956055, -0.04125977, -0.10253906,  0.09423828,
       -0.00585938])
 x          = array([0.11425781, 0.1953125 , 0.05761719, 0.15917969, 0.04833984,
       0.08251953, 0.02807617, 0.01318359, 0.10449219, 0.12255859,
       0.07373047])
 y          = array([0.00799561, 0.07373047, 0.27148438, 0.03491211, 0.11767578,
       0.046875  , 0.07763672, 0.05444336, 0.20703125, 0.02832031,
       0.07958984])
 z          = array([[ 4.9375 ,  4.5625 ,  7.0625 ,  2.375  ,  9.5625 ,  6.78125,
         7.125  ,  9.75   ,  6.25   ,  3.46875,  5... 5.375  ,  9.1875 ,  7.5625 ,  6.90625,  5.0625 ,  9.     ,
         6.125  ,  4.9375 ,  6.     ,  4.84375,  6.90625]])

 scripts/test.py:152: RuntimeWarning
 __________________ test_curved_bf16[neon-mahalanobis-11-5-5] ___________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.08745242, 0.04586745, 0.08788959, 0.1451298 , 0.10514453,
       0.0692286 , 0.11472143, 0.07700668, 0.20208317, 0.0069267 ,
       0.05854966], dtype=float32)
 a_bf16     = array([15795, 15676, 15796, 15893, 15831, 15758, 15851, 15774, 15951,
       15331, 15728], dtype=uint16)
 a_f32_rounded = array([0.08740234, 0.04589844, 0.08789062, 0.14550781, 0.10498047,
       0.06933594, 0.11474609, 0.07714844, 0.20214844, 0.00692749,
       0.05859375], dtype=float32)
 b          = array([0.0607146 , 0.12903747, 0.02143746, 0.11841147, 0.04157934,
       0.0399508 , 0.19762956, 0.04258467, 0.07077934, 0.12743457,
       0.15044075], dtype=float32)
 b_bf16     = array([15737, 15876, 15536, 15859, 15658, 15652, 15946, 15662, 15761,
       15874, 15898], dtype=uint16)
 b_f32_rounded = array([0.06079102, 0.12890625, 0.02148438, 0.11865234, 0.04150391,
       0.04003906, 0.19726562, 0.04248047, 0.07080078, 0.12695312,
       0.15039062], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.8150883, 11.464583 ,  8.785762 ,  9.223482 , 13.810913 ,
         6.4658813,  7.7385287, 12.277863 ,  7.359...5,  6.2955275,
         7.6874814,  6.3246527,  7.6468797,  7.367076 ,  7.3772964,
         7.419015 ]], dtype=float32)
 c_bf16     = array([[16602, 16695, 16653, 16660, 16733, 16591, 16632, 16708, 16619,
        16687, 16673],
       [16695, 16698, 16...8, 16620],
       [16673, 16654, 16662, 16664, 16585, 16630, 16586, 16629, 16620,
        16620, 16621]], dtype=uint16)
 c_f32_rounded = array([[ 6.8125  , 11.4375  ,  8.8125  ,  9.25    , 13.8125  ,  6.46875 ,
         7.75    , 12.25    ,  7.34375 , 10....  9.5     ,  6.28125 ,  7.6875  ,
         6.3125  ,  7.65625 ,  7.375   ,  7.375   ,  7.40625 ]],
      dtype=float32)
 capability = 'neon'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.08740234, 0.04589844, 0.08789062, 0.14550781, 0.10498047,
       0.06933594, 0.11474609, 0.07714844, 0.20214...8.875   ,  9.375   ,  9.5     ,  6.28125 ,  7.6875  ,
         6.3125  ,  7.65625 ,  7.375   ,  7.375   ,  7.40625 ]]))
        before     = 9866718362740
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.08740234, 0.04589844, 0.08789062, 0.14550781, 0.10498047,
       0.06933594, 0.11474609, 0.07714844, 0.20214844, 0.00692749,
       0.05859375])
 y = array([0.06079102, 0.12890625, 0.02148438, 0.11865234, 0.04150391,
       0.04003906, 0.19726562, 0.04248047, 0.07080078, 0.12695312,
       0.15039062])
 z = array([[ 6.8125  , 11.4375  ,  8.8125  ,  9.25    , 13.8125  ,  6.46875 ,
         7.75    , 12.25    ,  7.34375 , 10.... 8.875   ,  9.375   ,  9.5     ,  6.28125 ,  7.6875  ,
         6.3125  ,  7.65625 ,  7.375   ,  7.375   ,  7.40625 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.02661133, -0.08300781,  0.06640625,  0.02685547,  0.06347656,
        0.02929688, -0.08251953,  0.03466797,  0.13134766, -0.12002563,
       -0.09179688])
 x          = array([0.08740234, 0.04589844, 0.08789062, 0.14550781, 0.10498047,
       0.06933594, 0.11474609, 0.07714844, 0.20214844, 0.00692749,
       0.05859375])
 y          = array([0.06079102, 0.12890625, 0.02148438, 0.11865234, 0.04150391,
       0.04003906, 0.19726562, 0.04248047, 0.07080078, 0.12695312,
       0.15039062])
 z          = array([[ 6.8125  , 11.4375  ,  8.8125  ,  9.25    , 13.8125  ,  6.46875 ,
         7.75    , 12.25    ,  7.34375 , 10.... 8.875   ,  9.375   ,  9.5     ,  6.28125 ,  7.6875  ,
         6.3125  ,  7.65625 ,  7.375   ,  7.375   ,  7.40625 ]])

 scripts/test.py:152: RuntimeWarning
 __________________ test_curved_bf16[neon-mahalanobis-16-2-5] ___________________

 ndim = 16, metric = 'mahalanobis', capability = 'neon'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.05183683, 0.05329163, 0.01034037, 0.03325741, 0.06395545,
       0.03609726, 0.1356409 , 0.06577931, 0.06588486, 0.01263307,
       0.01698652, 0.04546768, 0.18378589, 0.0476885 , 0.10812191,
       0.0692324 ], dtype=float32)
 a_bf16     = array([15700, 15706, 15401, 15624, 15747, 15636, 15883, 15751, 15751,
       15439, 15499, 15674, 15932, 15683, 15837, 15758], dtype=uint16)
 a_f32_rounded = array([0.05175781, 0.05322266, 0.01031494, 0.03320312, 0.06396484,
       0.03613281, 0.13574219, 0.06591797, 0.06591797, 0.01263428,
       0.01696777, 0.04541016, 0.18359375, 0.04760742, 0.10791016,
       0.06933594], dtype=float32)
 b          = array([0.03255955, 0.05799727, 0.03979054, 0.04762046, 0.07283721,
       0.13581307, 0.13949206, 0.04703102, 0.09597909, 0.04846545,
       0.03073335, 0.02011583, 0.05464631, 0.05310578, 0.00074896,
       0.12306405], dtype=float32)
 b_bf16     = array([15621, 15726, 15651, 15683, 15765, 15883, 15887, 15681, 15813,
       15687, 15612, 15525, 15712, 15706, 14916, 15868], dtype=uint16)
 b_f32_rounded = array([0.0324707 , 0.05810547, 0.03979492, 0.04760742, 0.07275391,
       0.13574219, 0.13964844, 0.04711914, 0.09619141, 0.04858398,
       0.03076172, 0.0201416 , 0.0546875 , 0.05322266, 0.00074768,
       0.12304688], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[17.33531  , 12.695505 , 11.118306 ,  6.240057 , 11.698747 ,
        11.070826 , 14.165183 , 13.014894 , 11.581... , 14.860448 ,
        12.447666 ,  7.478157 ,  7.7401   , 14.174294 ,  9.124172 ,
        16.06184  ]], dtype=float32)
 c_bf16     = array([[16779, 16715, 16690, 16584, 16699, 16689, 16739, 16720, 16697,
        16718, 16619, 16652, 16679, 16669, 1666...680, 16733, 16746, 16660, 16720, 16758, 16699,
        16750, 16711, 16623, 16632, 16739, 16658, 16768]], dtype=uint16)
 c_f32_rounded = array([[17.375  , 12.6875 , 11.125  ,  6.25   , 11.6875 , 11.0625 ,
        14.1875 , 13.     , 11.5625 , 12.875  ,  7...   , 15.375  , 11.6875 , 14.875  , 12.4375 ,  7.46875,
         7.75   , 14.1875 ,  9.125  , 16.     ]], dtype=float32)
 capability = 'neon'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.05175781, 0.05322266, 0.01031494, 0.03320312, 0.06396484,
       0.03613281, 0.13574219, 0.06591797, 0.06591...
        13.     , 15.375  , 11.6875 , 14.875  , 12.4375 ,  7.46875,
         7.75   , 14.1875 ,  9.125  , 16.     ]]))
        before     = 9867370238465
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.05175781, 0.05322266, 0.01031494, 0.03320312, 0.06396484,
       0.03613281, 0.13574219, 0.06591797, 0.06591797, 0.01263428,
       0.01696777, 0.04541016, 0.18359375, 0.04760742, 0.10791016,
       0.06933594])
 y = array([0.0324707 , 0.05810547, 0.03979492, 0.04760742, 0.07275391,
       0.13574219, 0.13964844, 0.04711914, 0.09619141, 0.04858398,
       0.03076172, 0.0201416 , 0.0546875 , 0.05322266, 0.00074768,
       0.12304688])
 z = array([[17.375  , 12.6875 , 11.125  ,  6.25   , 11.6875 , 11.0625 ,
        14.1875 , 13.     , 11.5625 , 12.875  ,  7...,
        13.     , 15.375  , 11.6875 , 14.875  , 12.4375 ,  7.46875,
         7.75   , 14.1875 ,  9.125  , 16.     ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.01928711, -0.00488281, -0.02947998, -0.0144043 , -0.00878906,
       -0.09960938, -0.00390625,  0.01879883, -0.03027344, -0.03594971,
       -0.01379395,  0.02526855,  0.12890625, -0.00561523,  0.10716248,
       -0.05371094])
 x          = array([0.05175781, 0.05322266, 0.01031494, 0.03320312, 0.06396484,
       0.03613281, 0.13574219, 0.06591797, 0.06591797, 0.01263428,
       0.01696777, 0.04541016, 0.18359375, 0.04760742, 0.10791016,
       0.06933594])
 y          = array([0.0324707 , 0.05810547, 0.03979492, 0.04760742, 0.07275391,
       0.13574219, 0.13964844, 0.04711914, 0.09619141, 0.04858398,
       0.03076172, 0.0201416 , 0.0546875 , 0.05322266, 0.00074768,
       0.12304688])
 z          = array([[17.375  , 12.6875 , 11.125  ,  6.25   , 11.6875 , 11.0625 ,
        14.1875 , 13.     , 11.5625 , 12.875  ,  7...,
        13.     , 15.375  , 11.6875 , 14.875  , 12.4375 ,  7.46875,
         7.75   , 14.1875 ,  9.125  , 16.     ]])

 scripts/test.py:152: RuntimeWarning
 __________________ test_curved_bf16[neon-mahalanobis-16-3-5] ___________________

 ndim = 16, metric = 'mahalanobis', capability = 'neon'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.05913157, 0.00292312, 0.00207095, 0.13867807, 0.05677681,
       0.09225079, 0.06194059, 0.07912803, 0.04178503, 0.00351349,
       0.05326412, 0.07341746, 0.18337463, 0.07836896, 0.0677909 ,
       0.0055855 ], dtype=float32)
 a_bf16     = array([15730, 15168, 15112, 15886, 15721, 15805, 15742, 15778, 15659,
       15206, 15706, 15766, 15932, 15776, 15755, 15287], dtype=uint16)
 a_f32_rounded = array([0.05908203, 0.00292969, 0.0020752 , 0.13867188, 0.05688477,
       0.09228516, 0.06201172, 0.07910156, 0.04174805, 0.00350952,
       0.05322266, 0.07324219, 0.18359375, 0.078125  , 0.06787109,
       0.00558472], dtype=float32)
 b          = array([0.12802728, 0.08412331, 0.08800294, 0.06413439, 0.02525747,
       0.05046276, 0.06488016, 0.08743123, 0.06472802, 0.01927552,
       0.03286222, 0.02525537, 0.0665925 , 0.04587681, 0.07016247,
       0.08292751], dtype=float32)
 b_bf16     = array([15875, 15788, 15796, 15747, 15567, 15695, 15749, 15795, 15749,
       15518, 15623, 15567, 15752, 15676, 15760, 15786], dtype=uint16)
 b_f32_rounded = array([0.12792969, 0.08398438, 0.08789062, 0.06396484, 0.02526855,
       0.05053711, 0.06494141, 0.08740234, 0.06494141, 0.01928711,
       0.03295898, 0.02526855, 0.06640625, 0.04589844, 0.0703125 ,
       0.08300781], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.4047914, 12.385056 , 10.148139 , 11.792552 , 10.218336 ,
        17.296188 , 15.942404 , 10.417574 , 12.157... ,  6.8855577,
         5.4251018,  5.866832 ,  8.364968 , 10.78918  ,  9.315061 ,
         5.4821625]], dtype=float32)
 c_bf16     = array([[16589, 16710, 16674, 16701, 16675, 16778, 16767, 16679, 16707,
        16705, 16711, 16677, 16745, 16714, 1675...666, 16634, 16562, 16664, 16645, 16589, 16571,
        16604, 16558, 16572, 16646, 16685, 16661, 16559]], dtype=uint16)
 c_f32_rounded = array([[ 6.40625, 12.375  , 10.125  , 11.8125 , 10.1875 , 17.25   ,
        15.9375 , 10.4375 , 12.1875 , 12.0625 , 12...25 ,  6.40625,  5.84375,  6.875  ,  5.4375 ,  5.875  ,
         8.375  , 10.8125 ,  9.3125 ,  5.46875]], dtype=float32)
 capability = 'neon'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.05908203, 0.00292969, 0.0020752 , 0.13867188, 0.05688477,
       0.09228516, 0.06201172, 0.07910156, 0.04174...
         8.3125 ,  6.40625,  5.84375,  6.875  ,  5.4375 ,  5.875  ,
         8.375  , 10.8125 ,  9.3125 ,  5.46875]]))
        before     = 9868036884935
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.05908203, 0.00292969, 0.0020752 , 0.13867188, 0.05688477,
       0.09228516, 0.06201172, 0.07910156, 0.04174805, 0.00350952,
       0.05322266, 0.07324219, 0.18359375, 0.078125  , 0.06787109,
       0.00558472])
 y = array([0.12792969, 0.08398438, 0.08789062, 0.06396484, 0.02526855,
       0.05053711, 0.06494141, 0.08740234, 0.06494141, 0.01928711,
       0.03295898, 0.02526855, 0.06640625, 0.04589844, 0.0703125 ,
       0.08300781])
 z = array([[ 6.40625, 12.375  , 10.125  , 11.8125 , 10.1875 , 17.25   ,
        15.9375 , 10.4375 , 12.1875 , 12.0625 , 12...,
         8.3125 ,  6.40625,  5.84375,  6.875  ,  5.4375 ,  5.875  ,
         8.375  , 10.8125 ,  9.3125 ,  5.46875]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.06884766, -0.08105469, -0.08581543,  0.07470703,  0.03161621,
        0.04174805, -0.00292969, -0.00830078, -0.02319336, -0.01577759,
        0.02026367,  0.04797363,  0.1171875 ,  0.03222656, -0.00244141,
       -0.0774231 ])
 x          = array([0.05908203, 0.00292969, 0.0020752 , 0.13867188, 0.05688477,
       0.09228516, 0.06201172, 0.07910156, 0.04174805, 0.00350952,
       0.05322266, 0.07324219, 0.18359375, 0.078125  , 0.06787109,
       0.00558472])
 y          = array([0.12792969, 0.08398438, 0.08789062, 0.06396484, 0.02526855,
       0.05053711, 0.06494141, 0.08740234, 0.06494141, 0.01928711,
       0.03295898, 0.02526855, 0.06640625, 0.04589844, 0.0703125 ,
       0.08300781])
 z          = array([[ 6.40625, 12.375  , 10.125  , 11.8125 , 10.1875 , 17.25   ,
        15.9375 , 10.4375 , 12.1875 , 12.0625 , 12...,
         8.3125 ,  6.40625,  5.84375,  6.875  ,  5.4375 ,  5.875  ,
         8.375  , 10.8125 ,  9.3125 ,  5.46875]])

 scripts/test.py:152: RuntimeWarning
 __________________ test_curved_bf16[neon-mahalanobis-16-4-5] ___________________

 ndim = 16, metric = 'mahalanobis', capability = 'neon'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.0872926 , 0.00985545, 0.01741584, 0.07877909, 0.01683093,
       0.14965929, 0.04298203, 0.05967734, 0.10704197, 0.14927663,
       0.01436996, 0.01983159, 0.10524773, 0.01597997, 0.06592687,
       0.05983274], dtype=float32)
 a_bf16     = array([15795, 15393, 15503, 15777, 15498, 15897, 15664, 15732, 15835,
       15897, 15467, 15522, 15832, 15491, 15751, 15733], dtype=uint16)
 a_f32_rounded = array([0.08740234, 0.00982666, 0.01745605, 0.07861328, 0.0168457 ,
       0.14941406, 0.04296875, 0.05957031, 0.10693359, 0.14941406,
       0.01434326, 0.01977539, 0.10546875, 0.01599121, 0.06591797,
       0.05981445], dtype=float32)
 b          = array([0.0411532 , 0.09340645, 0.09705404, 0.00758618, 0.084663  ,
       0.06598785, 0.14902118, 0.02768487, 0.05018002, 0.04849224,
       0.07945713, 0.07135048, 0.10975375, 0.02423587, 0.03656166,
       0.01341206], dtype=float32)
 b_bf16     = array([15657, 15807, 15815, 15353, 15789, 15751, 15897, 15587, 15694,
       15687, 15779, 15762, 15841, 15559, 15638, 15452], dtype=uint16)
 b_f32_rounded = array([0.04125977, 0.09326172, 0.09716797, 0.00759888, 0.08447266,
       0.06591797, 0.14941406, 0.02770996, 0.05029297, 0.04858398,
       0.07958984, 0.07128906, 0.10986328, 0.02429199, 0.03662109,
       0.01342773], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[14.689027 ,  8.026973 , 10.535763 ,  7.6702414, 13.736474 ,
        11.740794 , 13.902348 , 11.434159 ,  8.888... , 14.991835 ,
        21.361666 , 15.339611 , 11.106105 , 18.958961 , 15.765928 ,
        23.546219 ]], dtype=float32)
 c_bf16     = array([[16747, 16640, 16681, 16629, 16732, 16700, 16734, 16695, 16654,
        16679, 16715, 16725, 16648, 16641, 1664...675, 16784, 16781, 16683, 16770, 16778, 16753,
        16752, 16811, 16757, 16690, 16792, 16764, 16828]], dtype=uint16)
 c_f32_rounded = array([[14.6875 ,  8.     , 10.5625 ,  7.65625, 13.75   , 11.75   ,
        13.875  , 11.4375 ,  8.875  , 10.4375 , 12...   , 17.25   , 15.0625 , 15.     , 21.375  , 15.3125 ,
        11.125  , 19.     , 15.75   , 23.5    ]], dtype=float32)
 capability = 'neon'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.08740234, 0.00982666, 0.01745605, 0.07861328, 0.0168457 ,
       0.14941406, 0.04296875, 0.05957031, 0.10693...
        16.25   , 17.25   , 15.0625 , 15.     , 21.375  , 15.3125 ,
        11.125  , 19.     , 15.75   , 23.5    ]]))
        before     = 9868695100379
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.08740234, 0.00982666, 0.01745605, 0.07861328, 0.0168457 ,
       0.14941406, 0.04296875, 0.05957031, 0.10693359, 0.14941406,
       0.01434326, 0.01977539, 0.10546875, 0.01599121, 0.06591797,
       0.05981445])
 y = array([0.04125977, 0.09326172, 0.09716797, 0.00759888, 0.08447266,
       0.06591797, 0.14941406, 0.02770996, 0.05029297, 0.04858398,
       0.07958984, 0.07128906, 0.10986328, 0.02429199, 0.03662109,
       0.01342773])
 z = array([[14.6875 ,  8.     , 10.5625 ,  7.65625, 13.75   , 11.75   ,
        13.875  , 11.4375 ,  8.875  , 10.4375 , 12...,
        16.25   , 17.25   , 15.0625 , 15.     , 21.375  , 15.3125 ,
        11.125  , 19.     , 15.75   , 23.5    ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.04614258, -0.08343506, -0.07971191,  0.0710144 , -0.06762695,
        0.08349609, -0.10644531,  0.03186035,  0.05664062,  0.10083008,
       -0.06524658, -0.05151367, -0.00439453, -0.00830078,  0.02929688,
        0.04638672])
 x          = array([0.08740234, 0.00982666, 0.01745605, 0.07861328, 0.0168457 ,
       0.14941406, 0.04296875, 0.05957031, 0.10693359, 0.14941406,
       0.01434326, 0.01977539, 0.10546875, 0.01599121, 0.06591797,
       0.05981445])
 y          = array([0.04125977, 0.09326172, 0.09716797, 0.00759888, 0.08447266,
       0.06591797, 0.14941406, 0.02770996, 0.05029297, 0.04858398,
       0.07958984, 0.07128906, 0.10986328, 0.02429199, 0.03662109,
       0.01342773])
 z          = array([[14.6875 ,  8.     , 10.5625 ,  7.65625, 13.75   , 11.75   ,
        13.875  , 11.4375 ,  8.875  , 10.4375 , 12...,
        16.25   , 17.25   , 15.0625 , 15.     , 21.375  , 15.3125 ,
        11.125  , 19.     , 15.75   , 23.5    ]])

 scripts/test.py:152: RuntimeWarning
 __________________ test_curved_bf16[neon-mahalanobis-33-2-5] ___________________

 ndim = 33, metric = 'mahalanobis', capability = 'neon'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.03540281, 0.00993484, 0.00022579, 0.02995291, 0.03050107,
       0.0066338 , 0.04019552, 0.03488199, 0.049223... 0.00603401, 0.06942448, 0.07744119, 0.05076193, 0.02362635,
       0.03672525, 0.03607661, 0.00587797], dtype=float32)
 a_bf16     = array([15633, 15395, 14701, 15605, 15610, 15321, 15653, 15631, 15690,
       15678, 15597, 15784, 15672, 15163, 15622,...14979, 15359, 15428, 15616, 15500, 15491, 15302, 15758,
       15775, 15696, 15554, 15638, 15636, 15297], dtype=uint16)
 a_f32_rounded = array([0.03540039, 0.00994873, 0.00022602, 0.02990723, 0.03051758,
       0.00662231, 0.0402832 , 0.03491211, 0.049316... 0.00604248, 0.06933594, 0.07763672, 0.05078125, 0.02368164,
       0.03662109, 0.03613281, 0.00588989], dtype=float32)
 b          = array([0.03703048, 0.03214178, 0.01813691, 0.05977879, 0.04672185,
       0.00504418, 0.01093503, 0.07843575, 0.044992... 0.02203535, 0.00879672, 0.06270665, 0.04260651, 0.02138894,
       0.00134449, 0.0022898 , 0.00896877], dtype=float32)
 b_bf16     = array([15640, 15620, 15509, 15733, 15679, 15269, 15411, 15777, 15672,
       15687, 15153, 15387, 15639, 15595, 15723,...15669, 15657, 15185, 15711, 15481, 15727, 15541, 15376,
       15744, 15663, 15535, 15024, 15126, 15379], dtype=uint16)
 b_f32_rounded = array([0.03710938, 0.03222656, 0.01818848, 0.05981445, 0.04663086,
       0.0050354 , 0.01092529, 0.07861328, 0.044921... 0.02209473, 0.00878906, 0.0625    , 0.04272461, 0.0213623 ,
       0.00134277, 0.00228882, 0.00897217], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[17.691227, 19.920671, 23.686909, ..., 21.670725, 27.799852,
        19.161392],
       [19.920671, 16.467949, ...    23.499474],
       [19.161392, 19.867712, 19.013178, ..., 25.527346, 23.499474,
        39.773933]], dtype=float32)
 c_bf16     = array([[16782, 16799, 16829, ..., 16813, 16862, 16793],
       [16799, 16772, 16825, ..., 16808, 16869, 16799],
      ...[16862, 16869, 16850, ..., 16782, 16760, 16828],
       [16793, 16799, 16792, ..., 16844, 16828, 16927]], dtype=uint16)
 c_f32_rounded = array([[17.75 , 19.875, 23.625, ..., 21.625, 27.75 , 19.125],
       [19.875, 16.5  , 23.125, ..., 21.   , 28.625, 19.....25 , ..., 17.75 , 15.5  , 23.5  ],
       [19.125, 19.875, 19.   , ..., 25.5  , 23.5  , 39.75 ]],
      dtype=float32)
 capability = 'neon'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.03540039, 0.00994873, 0.00022602, 0.02990723, 0.03051758,
       0.00662231, 0.0402832 , 0.03491211, 0.04931... [27.75 , 28.625, 26.25 , ..., 17.75 , 15.5  , 23.5  ],
       [19.125, 19.875, 19.   , ..., 25.5  , 23.5  , 39.75 ]]))
        before     = 9869406488262
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.03540039, 0.00994873, 0.00022602, 0.02990723, 0.03051758,
       0.00662231, 0.0402832 , 0.03491211, 0.049316...1599121,
       0.00604248, 0.06933594, 0.07763672, 0.05078125, 0.02368164,
       0.03662109, 0.03613281, 0.00588989])
 y = array([0.03710938, 0.03222656, 0.01818848, 0.05981445, 0.04663086,
       0.0050354 , 0.01092529, 0.07861328, 0.044921...5834961,
       0.02209473, 0.00878906, 0.0625    , 0.04272461, 0.0213623 ,
       0.00134277, 0.00228882, 0.00897217])
 z = array([[17.75 , 19.875, 23.625, ..., 21.625, 27.75 , 19.125],
       [19.875, 16.5  , 23.125, ..., 21.   , 28.625, 19....  [27.75 , 28.625, 26.25 , ..., 17.75 , 15.5  , 23.5  ],
       [19.125, 19.875, 19.   , ..., 25.5  , 23.5  , 39.75 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.00170898, -0.02227783, -0.01796246, -0.02990723, -0.01611328,
        0.00158691,  0.02935791, -0.04370117, ...
       -0.01605225,  0.06054688,  0.01513672,  0.00805664,  0.00231934,
        0.03527832,  0.03384399, -0.00308228])
 x          = array([0.03540039, 0.00994873, 0.00022602, 0.02990723, 0.03051758,
       0.00662231, 0.0402832 , 0.03491211, 0.049316...1599121,
       0.00604248, 0.06933594, 0.07763672, 0.05078125, 0.02368164,
       0.03662109, 0.03613281, 0.00588989])
 y          = array([0.03710938, 0.03222656, 0.01818848, 0.05981445, 0.04663086,
       0.0050354 , 0.01092529, 0.07861328, 0.044921...5834961,
       0.02209473, 0.00878906, 0.0625    , 0.04272461, 0.0213623 ,
       0.00134277, 0.00228882, 0.00897217])
 z          = array([[17.75 , 19.875, 23.625, ..., 21.625, 27.75 , 19.125],
       [19.875, 16.5  , 23.125, ..., 21.   , 28.625, 19....  [27.75 , 28.625, 26.25 , ..., 17.75 , 15.5  , 23.5  ],
       [19.125, 19.875, 19.   , ..., 25.5  , 23.5  , 39.75 ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_f16-mahalanobis-11-3-5] _________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.03623675, 0.1075649 , 0.06457897, 0.08994061, 0.04917687,
       0.06549812, 0.0609122 , 0.10095163, 0.10037415, 0.15568373,
       0.16908205], dtype=float32)
 a_bf16     = array([15636, 15836, 15748, 15800, 15689, 15750, 15737, 15823, 15822,
       15903, 15917], dtype=uint16)
 a_f32_rounded = array([0.03613281, 0.10742188, 0.06445312, 0.08984375, 0.04907227,
       0.06542969, 0.06079102, 0.10107422, 0.10058594, 0.15527344,
       0.16894531], dtype=float32)
 b          = array([0.05497362, 0.15438542, 0.0415131 , 0.02422095, 0.08834565,
       0.09366503, 0.07137044, 0.22715476, 0.15668277, 0.0121488 ,
       0.07553948], dtype=float32)
 b_bf16     = array([15713, 15902, 15658, 15558, 15797, 15808, 15762, 15977, 15904,
       15431, 15771], dtype=uint16)
 b_f32_rounded = array([0.05493164, 0.15429688, 0.04150391, 0.02416992, 0.08837891,
       0.09375   , 0.07128906, 0.22753906, 0.15625   , 0.012146  ,
       0.07568359], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 7.817663 ,  5.4203644,  8.029955 ,  8.285779 ,  6.1569657,
         7.7604337,  6.4321895,  8.86803  ,  7.965...2,  3.898123 ,
         5.958164 ,  7.4966826,  4.963253 ,  6.775997 ,  3.6787312,
         5.7920876]], dtype=float32)
 c_bf16     = array([[16634, 16557, 16640, 16645, 16581, 16632, 16590, 16654, 16639,
        16671, 16571],
       [16557, 16489, 16...9, 16491],
       [16571, 16486, 16530, 16597, 16505, 16575, 16624, 16543, 16601,
        16491, 16569]], dtype=uint16)
 c_f32_rounded = array([[ 7.8125  ,  5.40625 ,  8.      ,  8.3125  ,  6.15625 ,  7.75    ,
         6.4375  ,  8.875   ,  7.96875 ,  9....  6.65625 ,  3.890625,  5.96875 ,
         7.5     ,  4.96875 ,  6.78125 ,  3.671875,  5.78125 ]],
      dtype=float32)
 capability = 'neon_f16'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.03613281, 0.10742188, 0.06445312, 0.08984375, 0.04907227,
       0.06542969, 0.06079102, 0.10107422, 0.10058...3.59375 ,  4.5625  ,  6.65625 ,  3.890625,  5.96875 ,
         7.5     ,  4.96875 ,  6.78125 ,  3.671875,  5.78125 ]]))
        before     = 9870402592778
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.03613281, 0.10742188, 0.06445312, 0.08984375, 0.04907227,
       0.06542969, 0.06079102, 0.10107422, 0.10058594, 0.15527344,
       0.16894531])
 y = array([0.05493164, 0.15429688, 0.04150391, 0.02416992, 0.08837891,
       0.09375   , 0.07128906, 0.22753906, 0.15625   , 0.012146  ,
       0.07568359])
 z = array([[ 7.8125  ,  5.40625 ,  8.      ,  8.3125  ,  6.15625 ,  7.75    ,
         6.4375  ,  8.875   ,  7.96875 ,  9.... 3.59375 ,  4.5625  ,  6.65625 ,  3.890625,  5.96875 ,
         7.5     ,  4.96875 ,  6.78125 ,  3.671875,  5.78125 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.01879883, -0.046875  ,  0.02294922,  0.06567383, -0.03930664,
       -0.02832031, -0.01049805, -0.12646484, -0.05566406,  0.14312744,
        0.09326172])
 x          = array([0.03613281, 0.10742188, 0.06445312, 0.08984375, 0.04907227,
       0.06542969, 0.06079102, 0.10107422, 0.10058594, 0.15527344,
       0.16894531])
 y          = array([0.05493164, 0.15429688, 0.04150391, 0.02416992, 0.08837891,
       0.09375   , 0.07128906, 0.22753906, 0.15625   , 0.012146  ,
       0.07568359])
 z          = array([[ 7.8125  ,  5.40625 ,  8.      ,  8.3125  ,  6.15625 ,  7.75    ,
         6.4375  ,  8.875   ,  7.96875 ,  9.... 3.59375 ,  4.5625  ,  6.65625 ,  3.890625,  5.96875 ,
         7.5     ,  4.96875 ,  6.78125 ,  3.671875,  5.78125 ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_f16-mahalanobis-11-5-5] _________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.12660298, 0.08114947, 0.08007251, 0.115904  , 0.2153351 ,
       0.05907138, 0.02359538, 0.09957831, 0.03943907, 0.0782379 ,
       0.08101383], dtype=float32)
 a_bf16     = array([15874, 15782, 15780, 15853, 15965, 15730, 15553, 15820, 15650,
       15776, 15782], dtype=uint16)
 a_f32_rounded = array([0.12695312, 0.08105469, 0.08007812, 0.11572266, 0.21582031,
       0.05908203, 0.02355957, 0.09960938, 0.03955078, 0.078125  ,
       0.08105469], dtype=float32)
 b          = array([0.16647793, 0.11072951, 0.05530515, 0.04440652, 0.04506533,
       0.05310299, 0.09118676, 0.145397  , 0.09080222, 0.08254655,
       0.11498006], dtype=float32)
 b_bf16     = array([15914, 15843, 15715, 15670, 15673, 15706, 15803, 15893, 15802,
       15785, 15851], dtype=uint16)
 b_f32_rounded = array([0.16601562, 0.11083984, 0.05541992, 0.04443359, 0.04516602,
       0.05322266, 0.09130859, 0.14550781, 0.09082031, 0.08251953,
       0.11474609], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.9817986, 12.20738  ,  8.849764 ,  5.7134676,  6.459654 ,
         8.388977 ,  7.9466085,  5.715294 ,  6.953... , 10.541095 ,
        11.068749 , 10.489439 ,  7.5722394, 12.647684 , 13.608071 ,
        10.3798685]], dtype=float32)
 c_bf16     = array([[16607, 16707, 16654, 16567, 16591, 16646, 16638, 16567, 16607,
        16666, 16700],
       [16707, 16673, 16...9, 16730],
       [16700, 16648, 16694, 16678, 16681, 16689, 16680, 16626, 16714,
        16730, 16678]], dtype=uint16)
 c_f32_rounded = array([[ 6.96875 , 12.1875  ,  8.875   ,  5.71875 ,  6.46875 ,  8.375   ,
         7.9375  ,  5.71875 ,  6.96875 ,  9.... 10.375   , 10.5625  , 11.0625  ,
        10.5     ,  7.5625  , 12.625   , 13.625   , 10.375   ]],
      dtype=float32)
 capability = 'neon_f16'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.12695312, 0.08105469, 0.08007812, 0.11572266, 0.21582031,
       0.05908203, 0.02355957, 0.09960938, 0.03955...8.5     , 11.375   , 10.375   , 10.5625  , 11.0625  ,
        10.5     ,  7.5625  , 12.625   , 13.625   , 10.375   ]]))
        before     = 9871047550521
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.12695312, 0.08105469, 0.08007812, 0.11572266, 0.21582031,
       0.05908203, 0.02355957, 0.09960938, 0.03955078, 0.078125  ,
       0.08105469])
 y = array([0.16601562, 0.11083984, 0.05541992, 0.04443359, 0.04516602,
       0.05322266, 0.09130859, 0.14550781, 0.09082031, 0.08251953,
       0.11474609])
 z = array([[ 6.96875 , 12.1875  ,  8.875   ,  5.71875 ,  6.46875 ,  8.375   ,
         7.9375  ,  5.71875 ,  6.96875 ,  9.... 8.5     , 11.375   , 10.375   , 10.5625  , 11.0625  ,
        10.5     ,  7.5625  , 12.625   , 13.625   , 10.375   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.0390625 , -0.02978516,  0.0246582 ,  0.07128906,  0.1706543 ,
        0.00585938, -0.06774902, -0.04589844, -0.05126953, -0.00439453,
       -0.03369141])
 x          = array([0.12695312, 0.08105469, 0.08007812, 0.11572266, 0.21582031,
       0.05908203, 0.02355957, 0.09960938, 0.03955078, 0.078125  ,
       0.08105469])
 y          = array([0.16601562, 0.11083984, 0.05541992, 0.04443359, 0.04516602,
       0.05322266, 0.09130859, 0.14550781, 0.09082031, 0.08251953,
       0.11474609])
 z          = array([[ 6.96875 , 12.1875  ,  8.875   ,  5.71875 ,  6.46875 ,  8.375   ,
         7.9375  ,  5.71875 ,  6.96875 ,  9.... 8.5     , 11.375   , 10.375   , 10.5625  , 11.0625  ,
        10.5     ,  7.5625  , 12.625   , 13.625   , 10.375   ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_f16-mahalanobis-16-2-5] _________________

 ndim = 16, metric = 'mahalanobis', capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.0187652 , 0.02798309, 0.06458955, 0.06815584, 0.086556  ,
       0.10264703, 0.11966526, 0.08724401, 0.02591435, 0.05829881,
       0.03974538, 0.02560033, 0.09297265, 0.04335269, 0.0158213 ,
       0.12268848], dtype=float32)
 a_bf16     = array([15514, 15589, 15748, 15756, 15793, 15826, 15861, 15795, 15572,
       15727, 15651, 15570, 15806, 15666, 15490, 15867], dtype=uint16)
 a_f32_rounded = array([0.01879883, 0.0279541 , 0.06445312, 0.06835938, 0.08642578,
       0.10253906, 0.11962891, 0.08740234, 0.02587891, 0.05834961,
       0.03979492, 0.02563477, 0.09277344, 0.04345703, 0.01586914,
       0.12255859], dtype=float32)
 b          = array([0.15081042, 0.01463901, 0.09932765, 0.07558102, 0.07797746,
       0.11263864, 0.05485262, 0.0239128 , 0.05304088, 0.02663763,
       0.08695968, 0.08268384, 0.06237492, 0.01835205, 0.04134476,
       0.01886665], dtype=float32)
 b_bf16     = array([15898, 15472, 15819, 15771, 15776, 15847, 15713, 15556, 15705,
       15578, 15794, 15785, 15743, 15510, 15657, 15515], dtype=uint16)
 b_f32_rounded = array([0.15039062, 0.01464844, 0.09912109, 0.07568359, 0.078125  ,
       0.11279297, 0.05493164, 0.02392578, 0.05297852, 0.02661133,
       0.08691406, 0.08251953, 0.06225586, 0.01831055, 0.04125977,
       0.0189209 ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[10.039975 , 13.598471 , 11.985338 , 12.139287 , 13.866672 ,
         7.3827286, 11.465838 , 12.21033  , 10.533... ,  7.9961386,
        10.943364 ,  7.276928 ,  9.091882 , 14.926211 , 14.608421 ,
        11.069676 ]], dtype=float32)
 c_bf16     = array([[16673, 16730, 16704, 16706, 16734, 16620, 16695, 16707, 16681,
        16701, 16716, 16594, 16680, 16794, 1672...645, 16720, 16710, 16652, 16670, 16675, 16707,
        16640, 16687, 16617, 16657, 16751, 16746, 16689]], dtype=uint16)
 c_f32_rounded = array([[10.0625 , 13.625  , 12.     , 12.125  , 13.875  ,  7.375  ,
        11.4375 , 12.1875 , 10.5625 , 11.8125 , 12...5  , 10.1875 , 12.1875 ,  8.     , 10.9375 ,  7.28125,
         9.0625 , 14.9375 , 14.625  , 11.0625 ]], dtype=float32)
 capability = 'neon_f16'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.01879883, 0.0279541 , 0.06445312, 0.06835938, 0.08642578,
       0.10253906, 0.11962891, 0.08740234, 0.02587...
         9.875  , 10.1875 , 12.1875 ,  8.     , 10.9375 ,  7.28125,
         9.0625 , 14.9375 , 14.625  , 11.0625 ]]))
        before     = 9871700309199
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.01879883, 0.0279541 , 0.06445312, 0.06835938, 0.08642578,
       0.10253906, 0.11962891, 0.08740234, 0.02587891, 0.05834961,
       0.03979492, 0.02563477, 0.09277344, 0.04345703, 0.01586914,
       0.12255859])
 y = array([0.15039062, 0.01464844, 0.09912109, 0.07568359, 0.078125  ,
       0.11279297, 0.05493164, 0.02392578, 0.05297852, 0.02661133,
       0.08691406, 0.08251953, 0.06225586, 0.01831055, 0.04125977,
       0.0189209 ])
 z = array([[10.0625 , 13.625  , 12.     , 12.125  , 13.875  ,  7.375  ,
        11.4375 , 12.1875 , 10.5625 , 11.8125 , 12...,
         9.875  , 10.1875 , 12.1875 ,  8.     , 10.9375 ,  7.28125,
         9.0625 , 14.9375 , 14.625  , 11.0625 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.1315918 ,  0.01330566, -0.03466797, -0.00732422,  0.00830078,
       -0.01025391,  0.06469727,  0.06347656, -0.02709961,  0.03173828,
       -0.04711914, -0.05688477,  0.03051758,  0.02514648, -0.02539062,
        0.1036377 ])
 x          = array([0.01879883, 0.0279541 , 0.06445312, 0.06835938, 0.08642578,
       0.10253906, 0.11962891, 0.08740234, 0.02587891, 0.05834961,
       0.03979492, 0.02563477, 0.09277344, 0.04345703, 0.01586914,
       0.12255859])
 y          = array([0.15039062, 0.01464844, 0.09912109, 0.07568359, 0.078125  ,
       0.11279297, 0.05493164, 0.02392578, 0.05297852, 0.02661133,
       0.08691406, 0.08251953, 0.06225586, 0.01831055, 0.04125977,
       0.0189209 ])
 z          = array([[10.0625 , 13.625  , 12.     , 12.125  , 13.875  ,  7.375  ,
        11.4375 , 12.1875 , 10.5625 , 11.8125 , 12...,
         9.875  , 10.1875 , 12.1875 ,  8.     , 10.9375 ,  7.28125,
         9.0625 , 14.9375 , 14.625  , 11.0625 ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_f16-mahalanobis-16-4-5] _________________

 ndim = 16, metric = 'mahalanobis', capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.04482234, 0.03185978, 0.00234989, 0.18771383, 0.10329487,
       0.176091  , 0.02708258, 0.05448305, 0.02829141, 0.02019442,
       0.05695251, 0.08344387, 0.05610312, 0.10940359, 0.00598629,
       0.01192744], dtype=float32)
 a_bf16     = array([15672, 15618, 15130, 15936, 15828, 15924, 15582, 15711, 15592,
       15525, 15721, 15787, 15718, 15840, 15300, 15427], dtype=uint16)
 a_f32_rounded = array([0.04492188, 0.03173828, 0.00234985, 0.1875    , 0.10351562,
       0.17578125, 0.02709961, 0.05444336, 0.02832031, 0.0201416 ,
       0.05688477, 0.08349609, 0.05615234, 0.109375  , 0.00598145,
       0.01190186], dtype=float32)
 b          = array([0.05994695, 0.14019841, 0.00578102, 0.06275216, 0.02639994,
       0.173251  , 0.00403191, 0.03815669, 0.03410202, 0.06304766,
       0.04263454, 0.07754312, 0.10605574, 0.0437653 , 0.03493923,
       0.08739435], dtype=float32)
 b_bf16     = array([15734, 15888, 15293, 15745, 15576, 15921, 15236, 15644, 15628,
       15745, 15663, 15775, 15833, 15667, 15631, 15795], dtype=uint16)
 b_f32_rounded = array([0.06005859, 0.140625  , 0.00576782, 0.06298828, 0.02636719,
       0.17285156, 0.00402832, 0.03808594, 0.03417969, 0.06298828,
       0.04272461, 0.07763672, 0.10595703, 0.04370117, 0.03491211,
       0.08740234], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.1935654,  6.233251 ,  5.9464636,  9.545506 ,  9.607566 ,
         9.904306 ,  5.9225225,  6.4627686,  8.707... , 14.535968 ,
        13.095964 , 16.718227 , 10.963706 , 19.458944 , 11.356109 ,
        13.145994 ]], dtype=float32)
 c_bf16     = array([[16582, 16583, 16574, 16665, 16666, 16670, 16574, 16591, 16651,
        16583, 16667, 16688, 16534, 16643, 1668...670, 16746, 16647, 16648, 16659, 16712, 16704,
        16745, 16722, 16774, 16687, 16796, 16694, 16722]], dtype=uint16)
 c_f32_rounded = array([[ 6.1875 ,  6.21875,  5.9375 ,  9.5625 ,  9.625  ,  9.875  ,
         5.9375 ,  6.46875,  8.6875 ,  6.21875,  9...75 , 12.5    , 12.     , 14.5625 , 13.125  , 16.75   ,
        10.9375 , 19.5    , 11.375  , 13.125  ]], dtype=float32)
 capability = 'neon_f16'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.04492188, 0.03173828, 0.00234985, 0.1875    , 0.10351562,
       0.17578125, 0.02709961, 0.05444336, 0.02832...
         9.1875 , 12.5    , 12.     , 14.5625 , 13.125  , 16.75   ,
        10.9375 , 19.5    , 11.375  , 13.125  ]]))
        before     = 9872387520017
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.04492188, 0.03173828, 0.00234985, 0.1875    , 0.10351562,
       0.17578125, 0.02709961, 0.05444336, 0.02832031, 0.0201416 ,
       0.05688477, 0.08349609, 0.05615234, 0.109375  , 0.00598145,
       0.01190186])
 y = array([0.06005859, 0.140625  , 0.00576782, 0.06298828, 0.02636719,
       0.17285156, 0.00402832, 0.03808594, 0.03417969, 0.06298828,
       0.04272461, 0.07763672, 0.10595703, 0.04370117, 0.03491211,
       0.08740234])
 z = array([[ 6.1875 ,  6.21875,  5.9375 ,  9.5625 ,  9.625  ,  9.875  ,
         5.9375 ,  6.46875,  8.6875 ,  6.21875,  9...,
         9.1875 , 12.5    , 12.     , 14.5625 , 13.125  , 16.75   ,
        10.9375 , 19.5    , 11.375  , 13.125  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.01513672, -0.10888672, -0.00341797,  0.12451172,  0.07714844,
        0.00292969,  0.02307129,  0.01635742, -0.00585938, -0.04284668,
        0.01416016,  0.00585938, -0.04980469,  0.06567383, -0.02893066,
       -0.07550049])
 x          = array([0.04492188, 0.03173828, 0.00234985, 0.1875    , 0.10351562,
       0.17578125, 0.02709961, 0.05444336, 0.02832031, 0.0201416 ,
       0.05688477, 0.08349609, 0.05615234, 0.109375  , 0.00598145,
       0.01190186])
 y          = array([0.06005859, 0.140625  , 0.00576782, 0.06298828, 0.02636719,
       0.17285156, 0.00402832, 0.03808594, 0.03417969, 0.06298828,
       0.04272461, 0.07763672, 0.10595703, 0.04370117, 0.03491211,
       0.08740234])
 z          = array([[ 6.1875 ,  6.21875,  5.9375 ,  9.5625 ,  9.625  ,  9.875  ,
         5.9375 ,  6.46875,  8.6875 ,  6.21875,  9...,
         9.1875 , 12.5    , 12.     , 14.5625 , 13.125  , 16.75   ,
        10.9375 , 19.5    , 11.375  , 13.125  ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_f16-mahalanobis-16-5-5] _________________

 ndim = 16, metric = 'mahalanobis', capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.00321337, 0.00178822, 0.01222622, 0.02739578, 0.1285299 ,
       0.11758783, 0.04064427, 0.04234722, 0.06563295, 0.12499657,
       0.06940846, 0.00934492, 0.06865911, 0.06687667, 0.13597651,
       0.08537199], dtype=float32)
 a_bf16     = array([15187, 15082, 15432, 15584, 15876, 15857, 15654, 15661, 15750,
       15872, 15758, 15385, 15757, 15753, 15883, 15791], dtype=uint16)
 a_f32_rounded = array([0.0032196 , 0.00178528, 0.01220703, 0.02734375, 0.12890625,
       0.11767578, 0.04052734, 0.04223633, 0.06542969, 0.125     ,
       0.06933594, 0.00933838, 0.06884766, 0.06689453, 0.13574219,
       0.08544922], dtype=float32)
 b          = array([0.07463898, 0.17106412, 0.00617968, 0.05423074, 0.0214652 ,
       0.1660327 , 0.0016577 , 0.04512201, 0.03972566, 0.04433282,
       0.03280424, 0.05164053, 0.02586653, 0.12204687, 0.0194609 ,
       0.12373134], dtype=float32)
 b_bf16     = array([15769, 15919, 15306, 15710, 15536, 15914, 15065, 15673, 15651,
       15670, 15622, 15700, 15572, 15866, 15519, 15869], dtype=uint16)
 b_f32_rounded = array([0.07470703, 0.17089844, 0.00616455, 0.05419922, 0.02148438,
       0.16601562, 0.00165558, 0.04516602, 0.03979492, 0.04443359,
       0.03271484, 0.05175781, 0.02587891, 0.12207031, 0.01940918,
       0.12353516], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 9.854318 , 10.6485615, 10.57991  , 10.470633 , 10.919385 ,
         9.815948 , 13.691738 , 13.836289 ,  9.936... ,  9.231002 ,
        10.839449 ,  8.942866 ,  9.847292 ,  8.876825 ,  7.9924493,
        11.498605 ]], dtype=float32)
 c_bf16     = array([[16670, 16682, 16681, 16680, 16687, 16669, 16731, 16733, 16671,
        16721, 16665, 16683, 16656, 16529, 1662...645, 16711, 16672, 16639, 16682, 16702, 16655,
        16660, 16685, 16655, 16670, 16654, 16640, 16696]], dtype=uint16)
 c_f32_rounded = array([[ 9.875   , 10.625   , 10.5625  , 10.5     , 10.9375  ,  9.8125  ,
        13.6875  , 13.8125  ,  9.9375  , 13....75   ,  8.9375  ,  9.25    , 10.8125  ,  8.9375  ,
         9.875   ,  8.875   ,  8.      , 11.5     ]], dtype=float32)
 capability = 'neon_f16'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.0032196 , 0.00178528, 0.01220703, 0.02734375, 0.12890625,
       0.11767578, 0.04052734, 0.04223633, 0.06542...0.625   , 11.875   ,  8.9375  ,  9.25    , 10.8125  ,  8.9375  ,
         9.875   ,  8.875   ,  8.      , 11.5     ]]))
        before     = 9873056412368
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.0032196 , 0.00178528, 0.01220703, 0.02734375, 0.12890625,
       0.11767578, 0.04052734, 0.04223633, 0.06542969, 0.125     ,
       0.06933594, 0.00933838, 0.06884766, 0.06689453, 0.13574219,
       0.08544922])
 y = array([0.07470703, 0.17089844, 0.00616455, 0.05419922, 0.02148438,
       0.16601562, 0.00165558, 0.04516602, 0.03979492, 0.04443359,
       0.03271484, 0.05175781, 0.02587891, 0.12207031, 0.01940918,
       0.12353516])
 z = array([[ 9.875   , 10.625   , 10.5625  , 10.5     , 10.9375  ,  9.8125  ,
        13.6875  , 13.8125  ,  9.9375  , 13....10.625   , 11.875   ,  8.9375  ,  9.25    , 10.8125  ,  8.9375  ,
         9.875   ,  8.875   ,  8.      , 11.5     ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.07148743, -0.16911316,  0.00604248, -0.02685547,  0.10742188,
       -0.04833984,  0.03887177, -0.00292969,  0.02563477,  0.08056641,
        0.03662109, -0.04241943,  0.04296875, -0.05517578,  0.11633301,
       -0.03808594])
 x          = array([0.0032196 , 0.00178528, 0.01220703, 0.02734375, 0.12890625,
       0.11767578, 0.04052734, 0.04223633, 0.06542969, 0.125     ,
       0.06933594, 0.00933838, 0.06884766, 0.06689453, 0.13574219,
       0.08544922])
 y          = array([0.07470703, 0.17089844, 0.00616455, 0.05419922, 0.02148438,
       0.16601562, 0.00165558, 0.04516602, 0.03979492, 0.04443359,
       0.03271484, 0.05175781, 0.02587891, 0.12207031, 0.01940918,
       0.12353516])
 z          = array([[ 9.875   , 10.625   , 10.5625  , 10.5     , 10.9375  ,  9.8125  ,
        13.6875  , 13.8125  ,  9.9375  , 13....10.625   , 11.875   ,  8.9375  ,  9.25    , 10.8125  ,  8.9375  ,
         9.875   ,  8.875   ,  8.      , 11.5     ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_f16-mahalanobis-33-3-5] _________________

 ndim = 33, metric = 'mahalanobis', capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.02777223, 0.05310622, 0.05148476, 0.02330123, 0.00013496,
       0.04256925, 0.02332911, 0.02987235, 0.048136... 0.02026566, 0.00974322, 0.02171614, 0.02917806, 0.0726046 ,
       0.03370481, 0.01796835, 0.00958635], dtype=float32)
 a_bf16     = array([15588, 15706, 15699, 15551, 14606, 15662, 15551, 15605, 15685,
       15379, 15654, 15706, 15522, 15509, 15535,...15669, 15507, 15687, 15545, 15637, 15294, 15526, 15392,
       15538, 15599, 15765, 15626, 15507, 15389], dtype=uint16)
 a_f32_rounded = array([0.02783203, 0.05322266, 0.05151367, 0.02331543, 0.00013542,
       0.04248047, 0.02331543, 0.02990723, 0.048095... 0.02026367, 0.00976562, 0.02172852, 0.0291748 , 0.07275391,
       0.03369141, 0.01794434, 0.00958252], dtype=float32)
 b          = array([0.05022253, 0.02710875, 0.00150181, 0.00068361, 0.04717482,
       0.02628071, 0.06262916, 0.03145095, 0.084899... 0.01074896, 0.01571573, 0.01656697, 0.0389164 , 0.04878472,
       0.00698008, 0.01872955, 0.0275298 ], dtype=float32)
 b_bf16     = array([15694, 15582, 15045, 14899, 15681, 15575, 15744, 15617, 15790,
       15479, 15530, 15813, 15562, 15368, 15106,...15409, 15711, 15620, 15691, 15556, 15666, 15408, 15489,
       15496, 15647, 15688, 15333, 15513, 15586], dtype=uint16)
 b_f32_rounded = array([0.05029297, 0.02709961, 0.00150299, 0.00068283, 0.04711914,
       0.02624512, 0.0625    , 0.03149414, 0.084960... 0.01074219, 0.01574707, 0.01660156, 0.03881836, 0.04882812,
       0.00698853, 0.01867676, 0.02758789], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[17.31424 , 19.406195, 22.463463, ..., 16.932058, 20.984873,
        21.098936],
       [19.406195, 26.135996, ...    17.742523],
       [21.098936, 20.412914, 20.082333, ..., 16.12054 , 17.742523,
        28.222355]], dtype=float32)
 c_bf16     = array([[16779, 16795, 16820, ..., 16775, 16808, 16809],
       [16795, 16849, 16858, ..., 16808, 16804, 16803],
      ...[16808, 16804, 16823, ..., 16829, 16826, 16782],
       [16809, 16803, 16801, ..., 16769, 16782, 16866]], dtype=uint16)
 c_f32_rounded = array([[17.375, 19.375, 22.5  , ..., 16.875, 21.   , 21.125],
       [19.375, 26.125, 27.25 , ..., 21.   , 20.5  , 20.....875, ..., 23.625, 23.25 , 17.75 ],
       [21.125, 20.375, 20.125, ..., 16.125, 17.75 , 28.25 ]],
      dtype=float32)
 capability = 'neon_f16'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02783203, 0.05322266, 0.05151367, 0.02331543, 0.00013542,
       0.04248047, 0.02331543, 0.02990723, 0.04809... [21.   , 20.5  , 22.875, ..., 23.625, 23.25 , 17.75 ],
       [21.125, 20.375, 20.125, ..., 16.125, 17.75 , 28.25 ]]))
        before     = 9873785680899
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02783203, 0.05322266, 0.05151367, 0.02331543, 0.00013542,
       0.04248047, 0.02331543, 0.02990723, 0.048095...0579834,
       0.02026367, 0.00976562, 0.02172852, 0.0291748 , 0.07275391,
       0.03369141, 0.01794434, 0.00958252])
 y = array([0.05029297, 0.02709961, 0.00150299, 0.00068283, 0.04711914,
       0.02624512, 0.0625    , 0.03149414, 0.084960...4345703,
       0.01074219, 0.01574707, 0.01660156, 0.03881836, 0.04882812,
       0.00698853, 0.01867676, 0.02758789])
 z = array([[17.375, 19.375, 22.5  , ..., 16.875, 21.   , 21.125],
       [19.375, 26.125, 27.25 , ..., 21.   , 20.5  , 20....  [21.   , 20.5  , 22.875, ..., 23.625, 23.25 , 17.75 ],
       [21.125, 20.375, 20.125, ..., 16.125, 17.75 , 28.25 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.02246094,  0.02612305,  0.05001068,  0.0226326 , -0.04698372,
        0.01623535, -0.03918457, -0.00158691, ...
        0.00952148, -0.00598145,  0.00512695, -0.00964355,  0.02392578,
        0.02670288, -0.00073242, -0.01800537])
 x          = array([0.02783203, 0.05322266, 0.05151367, 0.02331543, 0.00013542,
       0.04248047, 0.02331543, 0.02990723, 0.048095...0579834,
       0.02026367, 0.00976562, 0.02172852, 0.0291748 , 0.07275391,
       0.03369141, 0.01794434, 0.00958252])
 y          = array([0.05029297, 0.02709961, 0.00150299, 0.00068283, 0.04711914,
       0.02624512, 0.0625    , 0.03149414, 0.084960...4345703,
       0.01074219, 0.01574707, 0.01660156, 0.03881836, 0.04882812,
       0.00698853, 0.01867676, 0.02758789])
 z          = array([[17.375, 19.375, 22.5  , ..., 16.875, 21.   , 21.125],
       [19.375, 26.125, 27.25 , ..., 21.   , 20.5  , 20....  [21.   , 20.5  , 22.875, ..., 23.625, 23.25 , 17.75 ],
       [21.125, 20.375, 20.125, ..., 16.125, 17.75 , 28.25 ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_f16-mahalanobis-33-4-5] _________________

 ndim = 33, metric = 'mahalanobis', capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.04750396, 0.03301255, 0.03774773, 0.03483185, 0.00123688,
       0.06084875, 0.00840154, 0.00936551, 0.050628... 0.08346719, 0.03994536, 0.01348785, 0.07025464, 0.00187356,
       0.01795703, 0.04231109, 0.06983557], dtype=float32)
 a_bf16     = array([15683, 15623, 15643, 15631, 15010, 15737, 15370, 15385, 15695,
       15710, 15187, 15788, 15505, 15491, 15421,...15110, 15086, 15146, 15585, 15549, 15361, 15787, 15652,
       15453, 15760, 15094, 15507, 15661, 15759], dtype=uint16)
 a_f32_rounded = array([0.04760742, 0.03295898, 0.0378418 , 0.03491211, 0.00123596,
       0.06079102, 0.00842285, 0.00933838, 0.050537... 0.08349609, 0.04003906, 0.01348877, 0.0703125 , 0.00187683,
       0.01794434, 0.04223633, 0.06982422], dtype=float32)
 b          = array([0.04417752, 0.02525324, 0.02315663, 0.02428544, 0.0229874 ,
       0.01903893, 0.03425672, 0.02193894, 0.004125... 0.02451756, 0.06177796, 0.01518871, 0.06182572, 0.05034459,
       0.01835296, 0.05674228, 0.01239985], dtype=float32)
 b_bf16     = array([15669, 15567, 15550, 15559, 15548, 15516, 15628, 15540, 15239,
       15657, 15704, 15621, 15695, 15625, 15720,...15535, 15737, 15042, 15476, 15415, 15526, 15561, 15741,
       15481, 15741, 15694, 15510, 15720, 15435], dtype=uint16)
 b_f32_rounded = array([0.04418945, 0.02526855, 0.02319336, 0.02429199, 0.02294922,
       0.01904297, 0.03417969, 0.02197266, 0.004119... 0.02453613, 0.06176758, 0.01519775, 0.06176758, 0.05029297,
       0.01831055, 0.05664062, 0.01239014], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[18.468346, 17.003525, 16.206911, ..., 18.416735, 18.777397,
        13.206338],
       [17.003525, 19.736534, ...    21.590797],
       [13.206338, 18.900831, 15.591153, ..., 26.614489, 21.590797,
        37.56935 ]], dtype=float32)
 c_bf16     = array([[16788, 16776, 16770, ..., 16787, 16790, 16723],
       [16776, 16798, 16725, ..., 16806, 16832, 16791],
      ...[16790, 16832, 16807, ..., 16806, 16747, 16813],
       [16723, 16791, 16761, ..., 16853, 16813, 16918]], dtype=uint16)
 c_f32_rounded = array([[18.5   , 17.    , 16.25  , ..., 18.375 , 18.75  , 13.1875],
       [17.    , 19.75  , 13.3125, ..., 20.75  , 2..., 20.75  , 14.6875, 21.625 ],
       [13.1875, 18.875 , 15.5625, ..., 26.625 , 21.625 , 37.5   ]],
      dtype=float32)
 capability = 'neon_f16'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.04760742, 0.03295898, 0.0378418 , 0.03491211, 0.00123596,
       0.06079102, 0.00842285, 0.00933838, 0.05053...4.    , 20.875 , ..., 20.75  , 14.6875, 21.625 ],
       [13.1875, 18.875 , 15.5625, ..., 26.625 , 21.625 , 37.5   ]]))
        before     = 9874401803965
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.04760742, 0.03295898, 0.0378418 , 0.03491211, 0.00123596,
       0.06079102, 0.00842285, 0.00933838, 0.050537...0787354,
       0.08349609, 0.04003906, 0.01348877, 0.0703125 , 0.00187683,
       0.01794434, 0.04223633, 0.06982422])
 y = array([0.04418945, 0.02526855, 0.02319336, 0.02429199, 0.02294922,
       0.01904297, 0.03417969, 0.02197266, 0.004119...2026367,
       0.02453613, 0.06176758, 0.01519775, 0.06176758, 0.05029297,
       0.01831055, 0.05664062, 0.01239014])
 z = array([[18.5   , 17.    , 16.25  , ..., 18.375 , 18.75  , 13.1875],
       [17.    , 19.75  , 13.3125, ..., 20.75  , 2...24.    , 20.875 , ..., 20.75  , 14.6875, 21.625 ],
       [13.1875, 18.875 , 15.5625, ..., 26.625 , 21.625 , 37.5   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.00341797,  0.00769043,  0.01464844,  0.01062012, -0.02171326,
        0.04174805, -0.02575684, -0.01263428, ...
        0.05895996, -0.02172852, -0.00170898,  0.00854492, -0.04841614,
       -0.00036621, -0.0144043 ,  0.05743408])
 x          = array([0.04760742, 0.03295898, 0.0378418 , 0.03491211, 0.00123596,
       0.06079102, 0.00842285, 0.00933838, 0.050537...0787354,
       0.08349609, 0.04003906, 0.01348877, 0.0703125 , 0.00187683,
       0.01794434, 0.04223633, 0.06982422])
 y          = array([0.04418945, 0.02526855, 0.02319336, 0.02429199, 0.02294922,
       0.01904297, 0.03417969, 0.02197266, 0.004119...2026367,
       0.02453613, 0.06176758, 0.01519775, 0.06176758, 0.05029297,
       0.01831055, 0.05664062, 0.01239014])
 z          = array([[18.5   , 17.    , 16.25  , ..., 18.375 , 18.75  , 13.1875],
       [17.    , 19.75  , 13.3125, ..., 20.75  , 2...24.    , 20.875 , ..., 20.75  , 14.6875, 21.625 ],
       [13.1875, 18.875 , 15.5625, ..., 26.625 , 21.625 , 37.5   ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_f16-mahalanobis-33-5-5] _________________

 ndim = 33, metric = 'mahalanobis', capability = 'neon_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.0110965 , 0.0076372 , 0.01143819, 0.01062276, 0.02670714,
       0.00097212, 0.03133371, 0.01331803, 0.008536... 0.03469085, 0.05778635, 0.00327137, 0.02760297, 0.04381743,
       0.01902995, 0.0662445 , 0.00644273], dtype=float32)
 a_bf16     = array([15414, 15354, 15419, 15406, 15579, 14975, 15616, 15450, 15372,
       15151, 15380, 15716, 15436, 15722, 15418,...15348, 15808, 15505, 15486, 15598, 15592, 15630, 15725,
       15190, 15586, 15667, 15516, 15752, 15315], dtype=uint16)
 a_f32_rounded = array([0.0111084 , 0.00762939, 0.01141357, 0.01062012, 0.0267334 ,
       0.00097275, 0.03125   , 0.01330566, 0.008544... 0.03466797, 0.05786133, 0.00326538, 0.02758789, 0.04370117,
       0.01904297, 0.06640625, 0.00643921], dtype=float32)
 b          = array([0.0145238 , 0.02365239, 0.03758197, 0.01718188, 0.01710484,
       0.07939181, 0.03574132, 0.02797304, 0.028115... 0.06450266, 0.00597   , 0.01174098, 0.01691502, 0.0244682 ,
       0.01522176, 0.03071585, 0.05447682], dtype=float32)
 b_bf16     = array([15470, 15554, 15642, 15501, 15500, 15779, 15634, 15589, 15590,
       15440, 15653, 15648, 15514, 15776, 15352,...15563, 15391, 15560, 15117, 15542, 15521, 15748, 15300,
       15424, 15499, 15560, 15481, 15612, 15711], dtype=uint16)
 b_f32_rounded = array([0.01452637, 0.02368164, 0.03759766, 0.01721191, 0.01708984,
       0.07958984, 0.03564453, 0.0279541 , 0.028076... 0.06445312, 0.00598145, 0.01171875, 0.01696777, 0.02441406,
       0.01519775, 0.03076172, 0.05444336], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[14.321381, 17.463158, 13.509762, ..., 17.593084, 16.241516,
        13.558319],
       [17.463158, 22.320063, ...    17.79762 ],
       [13.558319, 17.397844, 15.883398, ..., 15.349727, 17.79762 ,
        23.522417]], dtype=float32)
 c_bf16     = array([[16741, 16780, 16728, ..., 16781, 16770, 16729],
       [16780, 16819, 16787, ..., 16807, 16775, 16779],
      ...[16770, 16775, 16832, ..., 16826, 16834, 16782],
       [16729, 16779, 16766, ..., 16758, 16782, 16828]], dtype=uint16)
 c_f32_rounded = array([[14.3125, 17.5   , 13.5   , ..., 17.625 , 16.25  , 13.5625],
       [17.5   , 22.375 , 18.375 , ..., 20.875 , 1..., 23.25  , 24.25  , 17.75  ],
       [13.5625, 17.375 , 15.875 , ..., 15.375 , 17.75  , 23.5   ]],
      dtype=float32)
 capability = 'neon_f16'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.0111084 , 0.00762939, 0.01141357, 0.01062012, 0.0267334 ,
       0.00097275, 0.03125   , 0.01330566, 0.00854...6.875 , 24.    , ..., 23.25  , 24.25  , 17.75  ],
       [13.5625, 17.375 , 15.875 , ..., 15.375 , 17.75  , 23.5   ]]))
        before     = 9875015550546
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.0111084 , 0.00762939, 0.01141357, 0.01062012, 0.0267334 ,
       0.00097275, 0.03125   , 0.01330566, 0.008544...2832031,
       0.03466797, 0.05786133, 0.00326538, 0.02758789, 0.04370117,
       0.01904297, 0.06640625, 0.00643921])
 y = array([0.01452637, 0.02368164, 0.03759766, 0.01721191, 0.01708984,
       0.07958984, 0.03564453, 0.0279541 , 0.028076...1965332,
       0.06445312, 0.00598145, 0.01171875, 0.01696777, 0.02441406,
       0.01519775, 0.03076172, 0.05444336])
 z = array([[14.3125, 17.5   , 13.5   , ..., 17.625 , 16.25  , 13.5625],
       [17.5   , 22.375 , 18.375 , ..., 20.875 , 1...16.875 , 24.    , ..., 23.25  , 24.25  , 17.75  ],
       [13.5625, 17.375 , 15.875 , ..., 15.375 , 17.75  , 23.5   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.00341797, -0.01605225, -0.02618408, -0.0065918 ,  0.00964355,
       -0.0786171 , -0.00439453, -0.01464844, ...
       -0.02978516,  0.05187988, -0.00845337,  0.01062012,  0.01928711,
        0.00384521,  0.03564453, -0.04800415])
 x          = array([0.0111084 , 0.00762939, 0.01141357, 0.01062012, 0.0267334 ,
       0.00097275, 0.03125   , 0.01330566, 0.008544...2832031,
       0.03466797, 0.05786133, 0.00326538, 0.02758789, 0.04370117,
       0.01904297, 0.06640625, 0.00643921])
 y          = array([0.01452637, 0.02368164, 0.03759766, 0.01721191, 0.01708984,
       0.07958984, 0.03564453, 0.0279541 , 0.028076...1965332,
       0.06445312, 0.00598145, 0.01171875, 0.01696777, 0.02441406,
       0.01519775, 0.03076172, 0.05444336])
 z          = array([[14.3125, 17.5   , 13.5   , ..., 17.625 , 16.25  , 13.5625],
       [17.5   , 22.375 , 18.375 , ..., 20.875 , 1...16.875 , 24.    , ..., 23.25  , 24.25  , 17.75  ],
       [13.5625, 17.375 , 15.875 , ..., 15.375 , 17.75  , 23.5   ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_bf16-mahalanobis-11-2-5] ________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.12444483, 0.16363576, 0.04936424, 0.02344064, 0.07401826,
       0.1598549 , 0.04739544, 0.02037084, 0.08001196, 0.17489049,
       0.08257265], dtype=float32)
 a_bf16     = array([15871, 15912, 15690, 15552, 15768, 15908, 15682, 15527, 15780,
       15923, 15785], dtype=uint16)
 a_f32_rounded = array([0.12451172, 0.1640625 , 0.04931641, 0.0234375 , 0.07421875,
       0.16015625, 0.04736328, 0.02038574, 0.08007812, 0.17480469,
       0.08251953], dtype=float32)
 b          = array([0.03348864, 0.01470203, 0.07019281, 0.0513414 , 0.12153635,
       0.20668827, 0.0422055 , 0.10373543, 0.06305775, 0.15296541,
       0.14008649], dtype=float32)
 b_bf16     = array([15625, 15473, 15760, 15698, 15865, 15956, 15661, 15828, 15745,
       15901, 15887], dtype=uint16)
 b_f32_rounded = array([0.03344727, 0.01470947, 0.0703125 , 0.05126953, 0.12158203,
       0.20703125, 0.04223633, 0.10351562, 0.06298828, 0.15332031,
       0.13964844], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[10.852449 ,  9.883165 , 14.5437355, 14.079592 ,  7.5837593,
        10.9680805, 10.343234 ,  8.328876 , 13.049... ,  5.348032 ,
         5.2513585,  5.444852 ,  6.3534193,  5.6364903, 10.238767 ,
         4.8202224]], dtype=float32)
 c_bf16     = array([[16686, 16670, 16745, 16737, 16627, 16687, 16677, 16645, 16721,
        16693, 16598],
       [16670, 16716, 16...8, 16676],
       [16598, 16641, 16550, 16566, 16555, 16552, 16558, 16587, 16564,
        16676, 16538]], dtype=uint16)
 c_f32_rounded = array([[10.875  ,  9.875  , 14.5625 , 14.0625 ,  7.59375, 10.9375 ,
        10.3125 ,  8.3125 , 13.0625 , 11.3125 ,  6...25 ,  5.1875 ,  5.6875 ,  5.34375,  5.25   ,
         5.4375 ,  6.34375,  5.625  , 10.25   ,  4.8125 ]], dtype=float32)
 capability = 'neon_bf16'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.12451172, 0.1640625 , 0.04931641, 0.0234375 , 0.07421875,
       0.16015625, 0.04736328, 0.02038574, 0.08007...6.6875 ,  8.0625 ,  5.1875 ,  5.6875 ,  5.34375,  5.25   ,
         5.4375 ,  6.34375,  5.625  , 10.25   ,  4.8125 ]]))
        before     = 9875924038487
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.12451172, 0.1640625 , 0.04931641, 0.0234375 , 0.07421875,
       0.16015625, 0.04736328, 0.02038574, 0.08007812, 0.17480469,
       0.08251953])
 y = array([0.03344727, 0.01470947, 0.0703125 , 0.05126953, 0.12158203,
       0.20703125, 0.04223633, 0.10351562, 0.06298828, 0.15332031,
       0.13964844])
 z = array([[10.875  ,  9.875  , 14.5625 , 14.0625 ,  7.59375, 10.9375 ,
        10.3125 ,  8.3125 , 13.0625 , 11.3125 ,  6... 6.6875 ,  8.0625 ,  5.1875 ,  5.6875 ,  5.34375,  5.25   ,
         5.4375 ,  6.34375,  5.625  , 10.25   ,  4.8125 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.09106445,  0.14935303, -0.02099609, -0.02783203, -0.04736328,
       -0.046875  ,  0.00512695, -0.08312988,  0.01708984,  0.02148438,
       -0.05712891])
 x          = array([0.12451172, 0.1640625 , 0.04931641, 0.0234375 , 0.07421875,
       0.16015625, 0.04736328, 0.02038574, 0.08007812, 0.17480469,
       0.08251953])
 y          = array([0.03344727, 0.01470947, 0.0703125 , 0.05126953, 0.12158203,
       0.20703125, 0.04223633, 0.10351562, 0.06298828, 0.15332031,
       0.13964844])
 z          = array([[10.875  ,  9.875  , 14.5625 , 14.0625 ,  7.59375, 10.9375 ,
        10.3125 ,  8.3125 , 13.0625 , 11.3125 ,  6... 6.6875 ,  8.0625 ,  5.1875 ,  5.6875 ,  5.34375,  5.25   ,
         5.4375 ,  6.34375,  5.625  , 10.25   ,  4.8125 ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_bf16-mahalanobis-11-3-5] ________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.13549937, 0.04836488, 0.05690934, 0.05291481, 0.10942064,
       0.15044764, 0.16776882, 0.04352165, 0.01045666, 0.12165812,
       0.10303805], dtype=float32)
 a_bf16     = array([15883, 15686, 15721, 15705, 15840, 15898, 15916, 15666, 15403,
       15865, 15827], dtype=uint16)
 a_f32_rounded = array([0.13574219, 0.04833984, 0.05688477, 0.05297852, 0.109375  ,
       0.15039062, 0.16796875, 0.04345703, 0.01043701, 0.12158203,
       0.10302734], dtype=float32)
 b          = array([0.13553213, 0.01628355, 0.01442712, 0.07736495, 0.02490107,
       0.06881473, 0.08837897, 0.13901609, 0.2544206 , 0.04179898,
       0.13906178], dtype=float32)
 b_bf16     = array([15883, 15493, 15468, 15774, 15564, 15757, 15797, 15886, 16002,
       15659, 15886], dtype=uint16)
 b_f32_rounded = array([0.13574219, 0.01623535, 0.0144043 , 0.07714844, 0.02490234,
       0.06884766, 0.08837891, 0.13867188, 0.25390625, 0.04174805,
       0.13867188], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 9.719926 , 12.909989 ,  7.0255666,  7.8741727,  6.0212674,
         9.792926 ,  5.681994 ,  9.05898  ,  8.338...3,  6.9040966,
         7.76728  ,  5.6298556,  4.7018995,  4.0904346,  6.8353844,
         9.0298195]], dtype=float32)
 c_bf16     = array([[16668, 16719, 16609, 16636, 16577, 16669, 16566, 16657, 16645,
        16641, 16603],
       [16719, 16698, 16...3, 16603],
       [16603, 16553, 16562, 16589, 16605, 16633, 16564, 16534, 16515,
        16603, 16656]], dtype=uint16)
 c_f32_rounded = array([[ 9.75    , 12.9375  ,  7.03125 ,  7.875   ,  6.03125 ,  9.8125  ,
         5.6875  ,  9.0625  ,  8.3125  ,  8....  6.40625 ,  6.90625 ,  7.78125 ,
         5.625   ,  4.6875  ,  4.09375 ,  6.84375 ,  9.      ]],
      dtype=float32)
 capability = 'neon_bf16'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.13574219, 0.04833984, 0.05688477, 0.05297852, 0.109375  ,
       0.15039062, 0.16796875, 0.04345703, 0.01043...5.28125 ,  5.5625  ,  6.40625 ,  6.90625 ,  7.78125 ,
         5.625   ,  4.6875  ,  4.09375 ,  6.84375 ,  9.      ]]))
        before     = 9876548929859
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.13574219, 0.04833984, 0.05688477, 0.05297852, 0.109375  ,
       0.15039062, 0.16796875, 0.04345703, 0.01043701, 0.12158203,
       0.10302734])
 y = array([0.13574219, 0.01623535, 0.0144043 , 0.07714844, 0.02490234,
       0.06884766, 0.08837891, 0.13867188, 0.25390625, 0.04174805,
       0.13867188])
 z = array([[ 9.75    , 12.9375  ,  7.03125 ,  7.875   ,  6.03125 ,  9.8125  ,
         5.6875  ,  9.0625  ,  8.3125  ,  8.... 5.28125 ,  5.5625  ,  6.40625 ,  6.90625 ,  7.78125 ,
         5.625   ,  4.6875  ,  4.09375 ,  6.84375 ,  9.      ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.        ,  0.03210449,  0.04248047, -0.02416992,  0.08447266,
        0.08154297,  0.07958984, -0.09521484, -0.24346924,  0.07983398,
       -0.03564453])
 x          = array([0.13574219, 0.04833984, 0.05688477, 0.05297852, 0.109375  ,
       0.15039062, 0.16796875, 0.04345703, 0.01043701, 0.12158203,
       0.10302734])
 y          = array([0.13574219, 0.01623535, 0.0144043 , 0.07714844, 0.02490234,
       0.06884766, 0.08837891, 0.13867188, 0.25390625, 0.04174805,
       0.13867188])
 z          = array([[ 9.75    , 12.9375  ,  7.03125 ,  7.875   ,  6.03125 ,  9.8125  ,
         5.6875  ,  9.0625  ,  8.3125  ,  8.... 5.28125 ,  5.5625  ,  6.40625 ,  6.90625 ,  7.78125 ,
         5.625   ,  4.6875  ,  4.09375 ,  6.84375 ,  9.      ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_bf16-mahalanobis-11-4-5] ________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.03352097, 0.03991029, 0.10008309, 0.16201249, 0.02251969,
       0.02027413, 0.11847238, 0.27737138, 0.12490706, 0.06485045,
       0.03607809], dtype=float32)
 a_bf16     = array([15625, 15651, 15821, 15910, 15544, 15526, 15859, 16014, 15872,
       15749, 15636], dtype=uint16)
 a_f32_rounded = array([0.03344727, 0.03979492, 0.10009766, 0.16210938, 0.02246094,
       0.02026367, 0.11865234, 0.27734375, 0.125     , 0.06494141,
       0.03613281], dtype=float32)
 b          = array([0.09316526, 0.08617064, 0.23027349, 0.12497938, 0.06561078,
       0.12282931, 0.06679186, 0.03675097, 0.04377365, 0.03586133,
       0.09379335], dtype=float32)
 b_bf16     = array([15807, 15792, 15980, 15872, 15750, 15868, 15753, 15639, 15667,
       15635, 15808], dtype=uint16)
 b_f32_rounded = array([0.09326172, 0.0859375 , 0.23046875, 0.125     , 0.06542969,
       0.12304688, 0.06689453, 0.03686523, 0.04370117, 0.03588867,
       0.09375   ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 5.3616076,  3.3963552,  5.1804085,  3.6821842,  6.1361313,
         3.9196255,  6.652673 ,  6.71683  ,  3.729...7, 10.15418  ,
        10.426258 ,  7.203807 ,  9.873688 , 10.035185 ,  5.5024314,
         6.0840855]], dtype=float32)
 c_bf16     = array([[16556, 16473, 16550, 16492, 16580, 16507, 16597, 16599, 16495,
        16420, 16538],
       [16473, 16420, 16...9, 16560],
       [16538, 16603, 16575, 16561, 16674, 16679, 16615, 16670, 16673,
        16560, 16579]], dtype=uint16)
 c_f32_rounded = array([[ 5.375   ,  3.390625,  5.1875  ,  3.6875  ,  6.125   ,  3.921875,
         6.65625 ,  6.71875 ,  3.734375,  2....  5.53125 , 10.125   , 10.4375  ,
         7.21875 ,  9.875   , 10.0625  ,  5.5     ,  6.09375 ]],
      dtype=float32)
 capability = 'neon_bf16'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.03344727, 0.03979492, 0.10009766, 0.16210938, 0.02246094,
       0.02026367, 0.11865234, 0.27734375, 0.125  ...6.84375 ,  5.96875 ,  5.53125 , 10.125   , 10.4375  ,
         7.21875 ,  9.875   , 10.0625  ,  5.5     ,  6.09375 ]]))
        before     = 9877167192658
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.03344727, 0.03979492, 0.10009766, 0.16210938, 0.02246094,
       0.02026367, 0.11865234, 0.27734375, 0.125     , 0.06494141,
       0.03613281])
 y = array([0.09326172, 0.0859375 , 0.23046875, 0.125     , 0.06542969,
       0.12304688, 0.06689453, 0.03686523, 0.04370117, 0.03588867,
       0.09375   ])
 z = array([[ 5.375   ,  3.390625,  5.1875  ,  3.6875  ,  6.125   ,  3.921875,
         6.65625 ,  6.71875 ,  3.734375,  2.... 6.84375 ,  5.96875 ,  5.53125 , 10.125   , 10.4375  ,
         7.21875 ,  9.875   , 10.0625  ,  5.5     ,  6.09375 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.05981445, -0.04614258, -0.13037109,  0.03710938, -0.04296875,
       -0.1027832 ,  0.05175781,  0.24047852,  0.08129883,  0.02905273,
       -0.05761719])
 x          = array([0.03344727, 0.03979492, 0.10009766, 0.16210938, 0.02246094,
       0.02026367, 0.11865234, 0.27734375, 0.125     , 0.06494141,
       0.03613281])
 y          = array([0.09326172, 0.0859375 , 0.23046875, 0.125     , 0.06542969,
       0.12304688, 0.06689453, 0.03686523, 0.04370117, 0.03588867,
       0.09375   ])
 z          = array([[ 5.375   ,  3.390625,  5.1875  ,  3.6875  ,  6.125   ,  3.921875,
         6.65625 ,  6.71875 ,  3.734375,  2.... 6.84375 ,  5.96875 ,  5.53125 , 10.125   , 10.4375  ,
         7.21875 ,  9.875   , 10.0625  ,  5.5     ,  6.09375 ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_bf16-mahalanobis-16-4-5] ________________

 ndim = 16, metric = 'mahalanobis', capability = 'neon_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.02337428, 0.07533833, 0.00681584, 0.0064529 , 0.0484837 ,
       0.07843176, 0.0935915 , 0.06110071, 0.03811973, 0.05382021,
       0.07056218, 0.07917359, 0.17387296, 0.00067207, 0.03828999,
       0.15190017], dtype=float32)
 a_bf16     = array([15551, 15770, 15327, 15315, 15687, 15777, 15808, 15738, 15644,
       15708, 15761, 15778, 15922, 14896, 15645, 15900], dtype=uint16)
 a_f32_rounded = array([0.02331543, 0.07519531, 0.00680542, 0.00643921, 0.04858398,
       0.07861328, 0.09375   , 0.06103516, 0.03808594, 0.05371094,
       0.07080078, 0.07910156, 0.17382812, 0.00067139, 0.03833008,
       0.15234375], dtype=float32)
 b          = array([0.08280549, 0.02420391, 0.11117893, 0.04726281, 0.00080889,
       0.05303811, 0.03805136, 0.04602144, 0.16601773, 0.01305763,
       0.12872238, 0.0017438 , 0.13989772, 0.10970775, 0.02688507,
       0.01059699], dtype=float32)
 b_bf16     = array([15786, 15558, 15844, 15682, 14932, 15705, 15644, 15677, 15914,
       15446, 15876, 15077, 15887, 15841, 15580, 15406], dtype=uint16)
 b_f32_rounded = array([0.08300781, 0.02416992, 0.11132812, 0.04736328, 0.00080872,
       0.05297852, 0.03808594, 0.04614258, 0.16601562, 0.01306152,
       0.12890625, 0.00174713, 0.13964844, 0.10986328, 0.02685547,
       0.01062012], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.134648 ,  9.334522 ,  8.520364 ,  8.119631 ,  7.591453 ,
         8.643665 ,  9.293434 ,  7.573728 ,  7.715...6,  4.1876874,
         5.600498 ,  4.311328 ,  8.576055 ,  8.0982895,  6.182906 ,
         7.6732025]], dtype=float32)
 c_bf16     = array([[16580, 16661, 16648, 16642, 16627, 16650, 16661, 16626, 16631,
        16641, 16640, 16618, 16572, 16569, 1666...629, 16594, 16476, 16561, 16593, 16568, 16637,
        16518, 16563, 16522, 16649, 16642, 16582, 16630]], dtype=uint16)
 c_f32_rounded = array([[ 6.125  ,  9.3125 ,  8.5    ,  8.125  ,  7.59375,  8.625  ,
         9.3125 ,  7.5625 ,  7.71875,  8.0625 ,  8...125,  5.75   ,  7.90625,  4.1875 ,  5.59375,  4.3125 ,
         8.5625 ,  8.125  ,  6.1875 ,  7.6875 ]], dtype=float32)
 capability = 'neon_bf16'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02331543, 0.07519531, 0.00680542, 0.00643921, 0.04858398,
       0.07861328, 0.09375   , 0.06103516, 0.03808...
         6.53125,  5.75   ,  7.90625,  4.1875 ,  5.59375,  4.3125 ,
         8.5625 ,  8.125  ,  6.1875 ,  7.6875 ]]))
        before     = 9877851457662
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02331543, 0.07519531, 0.00680542, 0.00643921, 0.04858398,
       0.07861328, 0.09375   , 0.06103516, 0.03808594, 0.05371094,
       0.07080078, 0.07910156, 0.17382812, 0.00067139, 0.03833008,
       0.15234375])
 y = array([0.08300781, 0.02416992, 0.11132812, 0.04736328, 0.00080872,
       0.05297852, 0.03808594, 0.04614258, 0.16601562, 0.01306152,
       0.12890625, 0.00174713, 0.13964844, 0.10986328, 0.02685547,
       0.01062012])
 z = array([[ 6.125  ,  9.3125 ,  8.5    ,  8.125  ,  7.59375,  8.625  ,
         9.3125 ,  7.5625 ,  7.71875,  8.0625 ,  8...,
         6.53125,  5.75   ,  7.90625,  4.1875 ,  5.59375,  4.3125 ,
         8.5625 ,  8.125  ,  6.1875 ,  7.6875 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.05969238,  0.05102539, -0.10452271, -0.04092407,  0.04777527,
        0.02563477,  0.05566406,  0.01489258, -0.12792969,  0.04064941,
       -0.05810547,  0.07735443,  0.03417969, -0.10919189,  0.01147461,
        0.14172363])
 x          = array([0.02331543, 0.07519531, 0.00680542, 0.00643921, 0.04858398,
       0.07861328, 0.09375   , 0.06103516, 0.03808594, 0.05371094,
       0.07080078, 0.07910156, 0.17382812, 0.00067139, 0.03833008,
       0.15234375])
 y          = array([0.08300781, 0.02416992, 0.11132812, 0.04736328, 0.00080872,
       0.05297852, 0.03808594, 0.04614258, 0.16601562, 0.01306152,
       0.12890625, 0.00174713, 0.13964844, 0.10986328, 0.02685547,
       0.01062012])
 z          = array([[ 6.125  ,  9.3125 ,  8.5    ,  8.125  ,  7.59375,  8.625  ,
         9.3125 ,  7.5625 ,  7.71875,  8.0625 ,  8...,
         6.53125,  5.75   ,  7.90625,  4.1875 ,  5.59375,  4.3125 ,
         8.5625 ,  8.125  ,  6.1875 ,  7.6875 ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_bf16-mahalanobis-16-5-5] ________________

 ndim = 16, metric = 'mahalanobis', capability = 'neon_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.03500987, 0.05402828, 0.04544835, 0.0752445 , 0.02774613,
       0.09703256, 0.07768188, 0.26136142, 0.00075037, 0.07955396,
       0.06836019, 0.02466174, 0.00890622, 0.12651353, 0.00081634,
       0.0168847 ], dtype=float32)
 a_bf16     = array([15631, 15709, 15674, 15770, 15587, 15815, 15775, 16006, 14917,
       15779, 15756, 15562, 15378, 15874, 14934, 15498], dtype=uint16)
 a_f32_rounded = array([0.03491211, 0.05395508, 0.04541016, 0.07519531, 0.02770996,
       0.09716797, 0.07763672, 0.26171875, 0.0007515 , 0.07958984,
       0.06835938, 0.0246582 , 0.00891113, 0.12695312, 0.00081635,
       0.0168457 ], dtype=float32)
 b          = array([0.03028795, 0.08425495, 0.05402244, 0.02330264, 0.02435431,
       0.13691372, 0.14805679, 0.03231295, 0.00346606, 0.03671833,
       0.03022857, 0.07227668, 0.00669509, 0.01712879, 0.09812911,
       0.20185167], dtype=float32)
 b_bf16     = array([15608, 15789, 15709, 15551, 15560, 15884, 15896, 15620, 15203,
       15638, 15608, 15764, 15323, 15500, 15817, 15951], dtype=uint16)
 b_f32_rounded = array([0.03027344, 0.08447266, 0.05395508, 0.02331543, 0.02441406,
       0.13671875, 0.1484375 , 0.03222656, 0.00346375, 0.03662109,
       0.03027344, 0.07226562, 0.00668335, 0.01708984, 0.09814453,
       0.20214844], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[15.215328 , 13.978329 , 12.998938 ,  8.552043 , 12.336058 ,
        15.450526 , 12.873751 , 12.626179 , 11.646... , 12.452763 ,
        15.807171 , 14.584166 , 20.777214 , 17.419899 , 16.521275 ,
        18.965944 ]], dtype=float32)
 c_bf16     = array([[16755, 16736, 16720, 16649, 16709, 16759, 16718, 16714, 16698,
        16563, 16770, 16687, 16662, 16624, 1674...609, 16730, 16759, 16675, 16684, 16744, 16773,
        16711, 16765, 16745, 16806, 16779, 16772, 16792]], dtype=uint16)
 c_f32_rounded = array([[15.1875 , 14.     , 13.     ,  8.5625 , 12.3125 , 15.4375 ,
        12.875  , 12.625  , 11.625  ,  5.59375, 16...   , 14.5    , 16.625  , 12.4375 , 15.8125 , 14.5625 ,
        20.75   , 17.375  , 16.5    , 19.     ]], dtype=float32)
 capability = 'neon_bf16'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.03491211, 0.05395508, 0.04541016, 0.07519531, 0.02770996,
       0.09716797, 0.07763672, 0.26171875, 0.00075...
        10.75   , 14.5    , 16.625  , 12.4375 , 15.8125 , 14.5625 ,
        20.75   , 17.375  , 16.5    , 19.     ]]))
        before     = 9878510176727
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.03491211, 0.05395508, 0.04541016, 0.07519531, 0.02770996,
       0.09716797, 0.07763672, 0.26171875, 0.0007515 , 0.07958984,
       0.06835938, 0.0246582 , 0.00891113, 0.12695312, 0.00081635,
       0.0168457 ])
 y = array([0.03027344, 0.08447266, 0.05395508, 0.02331543, 0.02441406,
       0.13671875, 0.1484375 , 0.03222656, 0.00346375, 0.03662109,
       0.03027344, 0.07226562, 0.00668335, 0.01708984, 0.09814453,
       0.20214844])
 z = array([[15.1875 , 14.     , 13.     ,  8.5625 , 12.3125 , 15.4375 ,
        12.875  , 12.625  , 11.625  ,  5.59375, 16...,
        10.75   , 14.5    , 16.625  , 12.4375 , 15.8125 , 14.5625 ,
        20.75   , 17.375  , 16.5    , 19.     ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.00463867, -0.03051758, -0.00854492,  0.05187988,  0.0032959 ,
       -0.03955078, -0.07080078,  0.22949219, -0.00271225,  0.04296875,
        0.03808594, -0.04760742,  0.00222778,  0.10986328, -0.09732819,
       -0.18530273])
 x          = array([0.03491211, 0.05395508, 0.04541016, 0.07519531, 0.02770996,
       0.09716797, 0.07763672, 0.26171875, 0.0007515 , 0.07958984,
       0.06835938, 0.0246582 , 0.00891113, 0.12695312, 0.00081635,
       0.0168457 ])
 y          = array([0.03027344, 0.08447266, 0.05395508, 0.02331543, 0.02441406,
       0.13671875, 0.1484375 , 0.03222656, 0.00346375, 0.03662109,
       0.03027344, 0.07226562, 0.00668335, 0.01708984, 0.09814453,
       0.20214844])
 z          = array([[15.1875 , 14.     , 13.     ,  8.5625 , 12.3125 , 15.4375 ,
        12.875  , 12.625  , 11.625  ,  5.59375, 16...,
        10.75   , 14.5    , 16.625  , 12.4375 , 15.8125 , 14.5625 ,
        20.75   , 17.375  , 16.5    , 19.     ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_bf16-mahalanobis-33-1-5] ________________

 ndim = 33, metric = 'mahalanobis', capability = 'neon_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.03759143, 0.04507597, 0.06221219, 0.02451427, 0.00286409,
       0.06663123, 0.00072974, 0.04810535, 0.029984... 0.06243014, 0.04953372, 0.01923179, 0.03550375, 0.00434402,
       0.00152147, 0.01422164, 0.08487976], dtype=float32)
 a_bf16     = array([15642, 15673, 15743, 15561, 15164, 15752, 14911, 15685, 15606,
       15619, 15575, 15643, 15414, 15291, 15678,...14966, 15754, 15423, 15721, 15561, 15393, 15744, 15691,
       15518, 15633, 15246, 15047, 15465, 15790], dtype=uint16)
 a_f32_rounded = array([0.03759766, 0.04516602, 0.06225586, 0.02453613, 0.00286865,
       0.06640625, 0.00072861, 0.0480957 , 0.030029... 0.0625    , 0.04956055, 0.01928711, 0.03540039, 0.0043335 ,
       0.00151825, 0.01422119, 0.08496094], dtype=float32)
 b          = array([0.03791765, 0.08399886, 0.03398993, 0.01493451, 0.02215652,
       0.03815432, 0.04458741, 0.03126188, 0.019273... 0.04326338, 0.02531599, 0.01344232, 0.03913211, 0.03963288,
       0.01549392, 0.04936967, 0.04549859], dtype=float32)
 b_bf16     = array([15643, 15788, 15627, 15477, 15542, 15644, 15671, 15616, 15518,
       15506, 15035, 15551, 15445, 15255, 15464,...15511, 15583, 15691, 15483, 15753, 15787, 15665, 15567,
       15452, 15648, 15650, 15486, 15690, 15674], dtype=uint16)
 b_f32_rounded = array([0.0378418 , 0.08398438, 0.03393555, 0.01495361, 0.0222168 ,
       0.03808594, 0.04467773, 0.03125   , 0.019287... 0.04321289, 0.02526855, 0.01342773, 0.0390625 , 0.03955078,
       0.01550293, 0.04931641, 0.04541016], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[23.416561, 28.505322, 30.152279, ..., 20.356342, 28.786318,
        24.749125],
       [28.505322, 23.957037, ...    29.003687],
       [24.749125, 22.984364, 18.374277, ..., 31.16929 , 29.003687,
        58.79127 ]], dtype=float32)
 c_bf16     = array([[16827, 16868, 16881, ..., 16803, 16870, 16838],
       [16868, 16832, 16847, ..., 16798, 16811, 16824],
      ...[16870, 16811, 16771, ..., 16790, 16787, 16872],
       [16838, 16824, 16787, ..., 16889, 16872, 17003]], dtype=uint16)
 c_f32_rounded = array([[23.375, 28.5  , 30.125, ..., 20.375, 28.75 , 24.75 ],
       [28.5  , 24.   , 25.875, ..., 19.75 , 21.375, 23.....375, ..., 18.75 , 18.375, 29.   ],
       [24.75 , 23.   , 18.375, ..., 31.125, 29.   , 58.75 ]],
      dtype=float32)
 capability = 'neon_bf16'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.03759766, 0.04516602, 0.06225586, 0.02453613, 0.00286865,
       0.06640625, 0.00072861, 0.0480957 , 0.03002... [28.75 , 21.375, 16.375, ..., 18.75 , 18.375, 29.   ],
       [24.75 , 23.   , 18.375, ..., 31.125, 29.   , 58.75 ]]))
        before     = 9879173095088
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.03759766, 0.04516602, 0.06225586, 0.02453613, 0.00286865,
       0.06640625, 0.00072861, 0.0480957 , 0.030029...0982666,
       0.0625    , 0.04956055, 0.01928711, 0.03540039, 0.0043335 ,
       0.00151825, 0.01422119, 0.08496094])
 y = array([0.0378418 , 0.08398438, 0.03393555, 0.01495361, 0.0222168 ,
       0.03808594, 0.04467773, 0.03125   , 0.019287...8349609,
       0.04321289, 0.02526855, 0.01342773, 0.0390625 , 0.03955078,
       0.01550293, 0.04931641, 0.04541016])
 z = array([[23.375, 28.5  , 30.125, ..., 20.375, 28.75 , 24.75 ],
       [28.5  , 24.   , 25.875, ..., 19.75 , 21.375, 23....  [28.75 , 21.375, 16.375, ..., 18.75 , 18.375, 29.   ],
       [24.75 , 23.   , 18.375, ..., 31.125, 29.   , 58.75 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-2.44140625e-04, -3.88183594e-02,  2.83203125e-02,  9.58251953e-03,
       -1.93481445e-02,  2.83203125e-02, -4...2,  5.85937500e-03,
       -3.66210938e-03, -3.52172852e-02, -1.39846802e-02, -3.50952148e-02,
        3.95507812e-02])
 x          = array([0.03759766, 0.04516602, 0.06225586, 0.02453613, 0.00286865,
       0.06640625, 0.00072861, 0.0480957 , 0.030029...0982666,
       0.0625    , 0.04956055, 0.01928711, 0.03540039, 0.0043335 ,
       0.00151825, 0.01422119, 0.08496094])
 y          = array([0.0378418 , 0.08398438, 0.03393555, 0.01495361, 0.0222168 ,
       0.03808594, 0.04467773, 0.03125   , 0.019287...8349609,
       0.04321289, 0.02526855, 0.01342773, 0.0390625 , 0.03955078,
       0.01550293, 0.04931641, 0.04541016])
 z          = array([[23.375, 28.5  , 30.125, ..., 20.375, 28.75 , 24.75 ],
       [28.5  , 24.   , 25.875, ..., 19.75 , 21.375, 23....  [28.75 , 21.375, 16.375, ..., 18.75 , 18.375, 29.   ],
       [24.75 , 23.   , 18.375, ..., 31.125, 29.   , 58.75 ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[neon_bf16-mahalanobis-33-5-5] ________________

 ndim = 33, metric = 'mahalanobis', capability = 'neon_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.02548368, 0.02304076, 0.02163803, 0.01581331, 0.02032062,
       0.06777374, 0.02448171, 0.01387164, 0.061259... 0.05077266, 0.01539691, 0.04030243, 0.01103134, 0.00522961,
       0.01160258, 0.02340525, 0.03211085], dtype=float32)
 a_bf16     = array([15569, 15549, 15537, 15490, 15526, 15755, 15561, 15459, 15739,
       15732, 15666, 15566, 15495, 15536, 15765,...14941, 15568, 15687, 15312, 15559, 15639, 15696, 15484,
       15653, 15413, 15275, 15422, 15552, 15620], dtype=uint16)
 a_f32_rounded = array([0.0255127 , 0.02307129, 0.02160645, 0.01586914, 0.02026367,
       0.06787109, 0.02453613, 0.01385498, 0.061279... 0.05078125, 0.01538086, 0.0402832 , 0.01104736, 0.00521851,
       0.01159668, 0.0234375 , 0.03222656], dtype=float32)
 b          = array([0.06594564, 0.01171315, 0.06478352, 0.04494037, 0.03824661,
       0.06186473, 0.00505846, 0.0058718 , 0.067501... 0.02032287, 0.02965601, 0.02288375, 0.00612104, 0.01436543,
       0.01073536, 0.0260749 , 0.03002767], dtype=float32)
 b_bf16     = array([15751, 15424, 15749, 15672, 15645, 15741, 15270, 15296, 15754,
       15830, 15798, 15631, 15368, 15479, 15534,...15171, 15325, 15647, 15456, 15466, 15358, 15526, 15603,
       15547, 15305, 15467, 15408, 15574, 15606], dtype=uint16)
 b_f32_rounded = array([0.06591797, 0.01171875, 0.06494141, 0.04492188, 0.03833008,
       0.06176758, 0.00506592, 0.00585938, 0.067382... 0.02026367, 0.02966309, 0.02282715, 0.00613403, 0.01434326,
       0.01074219, 0.02612305, 0.0300293 ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[26.448719, 19.743904, 20.182337, ..., 17.983122, 25.641468,
        17.009567],
       [19.743904, 25.298843, ...    21.533241],
       [17.009567, 22.96305 , 19.409868, ..., 26.858765, 21.533241,
        40.723488]], dtype=float32)
 c_bf16     = array([[16852, 16798, 16801, ..., 16784, 16845, 16776],
       [16798, 16842, 16858, ..., 16815, 16910, 16824],
      ...[16845, 16910, 16853, ..., 16889, 16869, 16812],
       [16776, 16824, 16795, ..., 16855, 16812, 16931]], dtype=uint16)
 c_f32_rounded = array([[26.5  , 19.75 , 20.125, ..., 18.   , 25.625, 17.   ],
       [19.75 , 25.25 , 27.25 , ..., 21.875, 35.5  , 23.....625, ..., 31.125, 28.625, 21.5  ],
       [17.   , 23.   , 19.375, ..., 26.875, 21.5  , 40.75 ]],
      dtype=float32)
 capability = 'neon_bf16'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.0255127 , 0.02307129, 0.02160645, 0.01586914, 0.02026367,
       0.06787109, 0.02453613, 0.01385498, 0.06127... [25.625, 35.5  , 26.625, ..., 31.125, 28.625, 21.5  ],
       [17.   , 23.   , 19.375, ..., 26.875, 21.5  , 40.75 ]]))
        before     = 9879862922368
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.0255127 , 0.02307129, 0.02160645, 0.01586914, 0.02026367,
       0.06787109, 0.02453613, 0.01385498, 0.061279...3686523,
       0.05078125, 0.01538086, 0.0402832 , 0.01104736, 0.00521851,
       0.01159668, 0.0234375 , 0.03222656])
 y = array([0.06591797, 0.01171875, 0.06494141, 0.04492188, 0.03833008,
       0.06176758, 0.00506592, 0.00585938, 0.067382...0775146,
       0.02026367, 0.02966309, 0.02282715, 0.00613403, 0.01434326,
       0.01074219, 0.02612305, 0.0300293 ])
 z = array([[26.5  , 19.75 , 20.125, ..., 18.   , 25.625, 17.   ],
       [19.75 , 25.25 , 27.25 , ..., 21.875, 35.5  , 23....  [25.625, 35.5  , 26.625, ..., 31.125, 28.625, 21.5  ],
       [17.   , 23.   , 19.375, ..., 26.875, 21.5  , 40.75 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.04040527,  0.01135254, -0.04333496, -0.02905273, -0.01806641,
        0.00610352,  0.01947021,  0.00799561, ...
        0.03051758, -0.01428223,  0.01745605,  0.00491333, -0.00912476,
        0.00085449, -0.00268555,  0.00219727])
 x          = array([0.0255127 , 0.02307129, 0.02160645, 0.01586914, 0.02026367,
       0.06787109, 0.02453613, 0.01385498, 0.061279...3686523,
       0.05078125, 0.01538086, 0.0402832 , 0.01104736, 0.00521851,
       0.01159668, 0.0234375 , 0.03222656])
 y          = array([0.06591797, 0.01171875, 0.06494141, 0.04492188, 0.03833008,
       0.06176758, 0.00506592, 0.00585938, 0.067382...0775146,
       0.02026367, 0.02966309, 0.02282715, 0.00613403, 0.01434326,
       0.01074219, 0.02612305, 0.0300293 ])
 z          = array([[26.5  , 19.75 , 20.125, ..., 18.   , 25.625, 17.   ],
       [19.75 , 25.25 , 27.25 , ..., 21.875, 35.5  , 23....  [25.625, 35.5  , 26.625, ..., 31.125, 28.625, 21.5  ],
       [17.   , 23.   , 19.375, ..., 26.875, 21.5  , 40.75 ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[neon_i8-mahalanobis-11-1-5] _________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.12882724, 0.16416474, 0.04316883, 0.08702265, 0.17556334,
       0.02098931, 0.07969304, 0.07090873, 0.06238796, 0.04239348,
       0.12488069], dtype=float32)
 a_bf16     = array([15876, 15912, 15665, 15794, 15924, 15532, 15779, 15761, 15744,
       15662, 15872], dtype=uint16)
 a_f32_rounded = array([0.12890625, 0.1640625 , 0.04321289, 0.08691406, 0.17578125,
       0.02099609, 0.07958984, 0.07080078, 0.0625    , 0.04248047,
       0.125     ], dtype=float32)
 b          = array([0.00805916, 0.17966451, 0.06070143, 0.09474083, 0.04607549,
       0.20778061, 0.12679362, 0.07607228, 0.02230324, 0.11092911,
       0.06687976], dtype=float32)
 b_bf16     = array([15364, 15928, 15737, 15810, 15677, 15957, 15874, 15772, 15543,
       15843, 15753], dtype=uint16)
 b_f32_rounded = array([0.00805664, 0.1796875 , 0.06079102, 0.09472656, 0.04614258,
       0.20800781, 0.12695312, 0.07617188, 0.02233887, 0.11083984,
       0.06689453], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 8.513703 ,  7.1715155,  9.650979 ,  7.8535976,  9.596152 ,
         7.904559 ,  4.635359 ,  6.431425 ,  9.544...5, 10.517974 ,
         7.387657 ,  7.676396 ,  4.9488873,  6.5172243,  6.2892776,
         6.5411525]], dtype=float32)
 c_bf16     = array([[16648, 16613, 16666, 16635, 16666, 16637, 16532, 16590, 16665,
        16700, 16570],
       [16613, 16696, 16...1, 16585],
       [16570, 16667, 16491, 16520, 16680, 16620, 16630, 16542, 16593,
        16585, 16593]], dtype=uint16)
 c_f32_rounded = array([[ 8.5     ,  7.15625 ,  9.625   ,  7.84375 ,  9.625   ,  7.90625 ,
         4.625   ,  6.4375  ,  9.5625  , 11....  4.25    , 10.5     ,  7.375   ,
         7.6875  ,  4.9375  ,  6.53125 ,  6.28125 ,  6.53125 ]],
      dtype=float32)
 capability = 'neon_i8'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.12890625, 0.1640625 , 0.04321289, 0.08691406, 0.17578125,
       0.02099609, 0.07958984, 0.07080078, 0.0625 ...9.6875  ,  3.671875,  4.25    , 10.5     ,  7.375   ,
         7.6875  ,  4.9375  ,  6.53125 ,  6.28125 ,  6.53125 ]]))
        before     = 9880754969098
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.12890625, 0.1640625 , 0.04321289, 0.08691406, 0.17578125,
       0.02099609, 0.07958984, 0.07080078, 0.0625    , 0.04248047,
       0.125     ])
 y = array([0.00805664, 0.1796875 , 0.06079102, 0.09472656, 0.04614258,
       0.20800781, 0.12695312, 0.07617188, 0.02233887, 0.11083984,
       0.06689453])
 z = array([[ 8.5     ,  7.15625 ,  9.625   ,  7.84375 ,  9.625   ,  7.90625 ,
         4.625   ,  6.4375  ,  9.5625  , 11.... 9.6875  ,  3.671875,  4.25    , 10.5     ,  7.375   ,
         7.6875  ,  4.9375  ,  6.53125 ,  6.28125 ,  6.53125 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.12084961, -0.015625  , -0.01757812, -0.0078125 ,  0.12963867,
       -0.18701172, -0.04736328, -0.00537109,  0.04016113, -0.06835938,
        0.05810547])
 x          = array([0.12890625, 0.1640625 , 0.04321289, 0.08691406, 0.17578125,
       0.02099609, 0.07958984, 0.07080078, 0.0625    , 0.04248047,
       0.125     ])
 y          = array([0.00805664, 0.1796875 , 0.06079102, 0.09472656, 0.04614258,
       0.20800781, 0.12695312, 0.07617188, 0.02233887, 0.11083984,
       0.06689453])
 z          = array([[ 8.5     ,  7.15625 ,  9.625   ,  7.84375 ,  9.625   ,  7.90625 ,
         4.625   ,  6.4375  ,  9.5625  , 11.... 9.6875  ,  3.671875,  4.25    , 10.5     ,  7.375   ,
         7.6875  ,  4.9375  ,  6.53125 ,  6.28125 ,  6.53125 ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[neon_i8-mahalanobis-11-4-5] _________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.0504795 , 0.14225361, 0.12101477, 0.14718536, 0.05214953,
       0.03942819, 0.00715109, 0.08749574, 0.24247923, 0.04620064,
       0.06416234], dtype=float32)
 a_bf16     = array([15695, 15890, 15864, 15895, 15702, 15649, 15338, 15795, 15992,
       15677, 15747], dtype=uint16)
 a_f32_rounded = array([0.05053711, 0.14257812, 0.12109375, 0.14746094, 0.05224609,
       0.03930664, 0.00714111, 0.08740234, 0.2421875 , 0.04614258,
       0.06396484], dtype=float32)
 b          = array([0.04601496, 0.14355789, 0.08270852, 0.19959657, 0.2146518 ,
       0.02157305, 0.0987808 , 0.07782577, 0.03997174, 0.04146788,
       0.03385102], dtype=float32)
 b_bf16     = array([15676, 15891, 15785, 15948, 15964, 15537, 15818, 15775, 15652,
       15658, 15627], dtype=uint16)
 b_f32_rounded = array([0.04589844, 0.14355469, 0.08251953, 0.19921875, 0.21484375,
       0.02160645, 0.09863281, 0.07763672, 0.04003906, 0.04150391,
       0.03393555], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 2.9370718,  5.6764865,  9.349348 ,  6.2527003,  4.3578324,
         7.728997 ,  3.6358573,  5.436585 ,  5.252...6,  8.241926 ,
         5.7890863, 12.701155 ,  8.492056 ,  7.5307484,  9.544923 ,
         4.950564 ]], dtype=float32)
 c_bf16     = array([[16444, 16566, 16662, 16584, 16523, 16631, 16489, 16558, 16552,
        16489, 16572],
       [16566, 16648, 16...4, 16665],
       [16572, 16691, 16712, 16565, 16644, 16569, 16715, 16648, 16625,
        16665, 16542]], dtype=uint16)
 c_f32_rounded = array([[ 2.9375  ,  5.6875  ,  9.375   ,  6.25    ,  4.34375 ,  7.71875 ,
         3.640625,  5.4375  ,  5.25    ,  3....  5.65625 ,  8.25    ,  5.78125 ,
        12.6875  ,  8.5     ,  7.53125 ,  9.5625  ,  4.9375  ]],
      dtype=float32)
 capability = 'neon_i8'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.05053711, 0.14257812, 0.12109375, 0.14746094, 0.05224609,
       0.03930664, 0.00714111, 0.08740234, 0.24218...1.1875  , 12.5     ,  5.65625 ,  8.25    ,  5.78125 ,
        12.6875  ,  8.5     ,  7.53125 ,  9.5625  ,  4.9375  ]]))
        before     = 9881407346800
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.05053711, 0.14257812, 0.12109375, 0.14746094, 0.05224609,
       0.03930664, 0.00714111, 0.08740234, 0.2421875 , 0.04614258,
       0.06396484])
 y = array([0.04589844, 0.14355469, 0.08251953, 0.19921875, 0.21484375,
       0.02160645, 0.09863281, 0.07763672, 0.04003906, 0.04150391,
       0.03393555])
 z = array([[ 2.9375  ,  5.6875  ,  9.375   ,  6.25    ,  4.34375 ,  7.71875 ,
         3.640625,  5.4375  ,  5.25    ,  3....11.1875  , 12.5     ,  5.65625 ,  8.25    ,  5.78125 ,
        12.6875  ,  8.5     ,  7.53125 ,  9.5625  ,  4.9375  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.00463867, -0.00097656,  0.03857422, -0.05175781, -0.16259766,
        0.0177002 , -0.0914917 ,  0.00976562,  0.20214844,  0.00463867,
        0.0300293 ])
 x          = array([0.05053711, 0.14257812, 0.12109375, 0.14746094, 0.05224609,
       0.03930664, 0.00714111, 0.08740234, 0.2421875 , 0.04614258,
       0.06396484])
 y          = array([0.04589844, 0.14355469, 0.08251953, 0.19921875, 0.21484375,
       0.02160645, 0.09863281, 0.07763672, 0.04003906, 0.04150391,
       0.03393555])
 z          = array([[ 2.9375  ,  5.6875  ,  9.375   ,  6.25    ,  4.34375 ,  7.71875 ,
         3.640625,  5.4375  ,  5.25    ,  3....11.1875  , 12.5     ,  5.65625 ,  8.25    ,  5.78125 ,
        12.6875  ,  8.5     ,  7.53125 ,  9.5625  ,  4.9375  ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[neon_i8-mahalanobis-11-5-5] _________________

 ndim = 11, metric = 'mahalanobis', capability = 'neon_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.00689728, 0.07263724, 0.14388928, 0.09344265, 0.00038593,
       0.02740988, 0.22714695, 0.15327665, 0.11345776, 0.05397059,
       0.10748576], dtype=float32)
 a_bf16     = array([15330, 15765, 15891, 15807, 14794, 15585, 15977, 15901, 15848,
       15709, 15836], dtype=uint16)
 a_f32_rounded = array([0.00689697, 0.07275391, 0.14355469, 0.09326172, 0.00038528,
       0.02746582, 0.22753906, 0.15332031, 0.11328125, 0.05395508,
       0.10742188], dtype=float32)
 b          = array([0.12982911, 0.06903804, 0.14025009, 0.02661307, 0.02312643,
       0.04258275, 0.09991504, 0.2156809 , 0.16511647, 0.0019417 ,
       0.08590632], dtype=float32)
 b_bf16     = array([15877, 15757, 15888, 15578, 15549, 15662, 15821, 15965, 15913,
       15103, 15792], dtype=uint16)
 b_f32_rounded = array([0.12988281, 0.06884766, 0.140625  , 0.02661133, 0.02307129,
       0.04248047, 0.10009766, 0.21582031, 0.16503906, 0.0019455 ,
       0.0859375 ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 7.558822 ,  7.5032635,  8.71324  ,  9.232068 , 11.219319 ,
         9.781657 , 15.152673 , 14.858665 ,  7.605... ,  6.331847 ,
         5.9239087,  3.966001 ,  8.0779295,  9.069678 ,  8.397492 ,
         7.744718 ]], dtype=float32)
 c_bf16     = array([[16626, 16624, 16651, 16660, 16692, 16669, 16754, 16750, 16627,
        16675, 16667],
       [16624, 16629, 16...6, 16646],
       [16667, 16681, 16548, 16641, 16587, 16574, 16510, 16641, 16657,
        16646, 16632]], dtype=uint16)
 c_f32_rounded = array([[ 7.5625 ,  7.5    ,  8.6875 ,  9.25   , 11.25   ,  9.8125 ,
        15.125  , 14.875  ,  7.59375, 10.1875 ,  9...25 ,  5.125  ,  8.0625 ,  6.34375,  5.9375 ,
         3.96875,  8.0625 ,  9.0625 ,  8.375  ,  7.75   ]], dtype=float32)
 capability = 'neon_i8'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.00689697, 0.07275391, 0.14355469, 0.09326172, 0.00038528,
       0.02746582, 0.22753906, 0.15332031, 0.11328...9.6875 , 10.5625 ,  5.125  ,  8.0625 ,  6.34375,  5.9375 ,
         3.96875,  8.0625 ,  9.0625 ,  8.375  ,  7.75   ]]))
        before     = 9882036573563
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.00689697, 0.07275391, 0.14355469, 0.09326172, 0.00038528,
       0.02746582, 0.22753906, 0.15332031, 0.11328125, 0.05395508,
       0.10742188])
 y = array([0.12988281, 0.06884766, 0.140625  , 0.02661133, 0.02307129,
       0.04248047, 0.10009766, 0.21582031, 0.16503906, 0.0019455 ,
       0.0859375 ])
 z = array([[ 7.5625 ,  7.5    ,  8.6875 ,  9.25   , 11.25   ,  9.8125 ,
        15.125  , 14.875  ,  7.59375, 10.1875 ,  9... 9.6875 , 10.5625 ,  5.125  ,  8.0625 ,  6.34375,  5.9375 ,
         3.96875,  8.0625 ,  9.0625 ,  8.375  ,  7.75   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.12298584,  0.00390625,  0.00292969,  0.06665039, -0.022686  ,
       -0.01501465,  0.12744141, -0.0625    , -0.05175781,  0.05200958,
        0.02148438])
 x          = array([0.00689697, 0.07275391, 0.14355469, 0.09326172, 0.00038528,
       0.02746582, 0.22753906, 0.15332031, 0.11328125, 0.05395508,
       0.10742188])
 y          = array([0.12988281, 0.06884766, 0.140625  , 0.02661133, 0.02307129,
       0.04248047, 0.10009766, 0.21582031, 0.16503906, 0.0019455 ,
       0.0859375 ])
 z          = array([[ 7.5625 ,  7.5    ,  8.6875 ,  9.25   , 11.25   ,  9.8125 ,
        15.125  , 14.875  ,  7.59375, 10.1875 ,  9... 9.6875 , 10.5625 ,  5.125  ,  8.0625 ,  6.34375,  5.9375 ,
         3.96875,  8.0625 ,  9.0625 ,  8.375  ,  7.75   ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[neon_i8-mahalanobis-16-1-5] _________________

 ndim = 16, metric = 'mahalanobis', capability = 'neon_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([1.38118327e-01, 1.13770850e-01, 3.53268944e-02, 1.12983815e-01,
       1.64687894e-02, 4.80867513e-02, 1.489433...91130e-02, 1.74880754e-02,
       9.86912027e-02, 4.70121950e-02, 5.43905646e-02, 1.30542107e-02],
      dtype=float32)
 a_bf16     = array([15885, 15849, 15633, 15847, 15495, 15685, 14620, 15762, 15696,
       15907, 15549, 15503, 15818, 15681, 15711, 15446], dtype=uint16)
 a_f32_rounded = array([1.3769531e-01, 1.1376953e-01, 3.5400391e-02, 1.1279297e-01,
       1.6479492e-02, 4.8095703e-02, 1.4877319e-04,... 2.3071289e-02, 1.7456055e-02,
       9.8632812e-02, 4.7119141e-02, 5.4443359e-02, 1.3061523e-02],
      dtype=float32)
 b          = array([0.13426441, 0.11970926, 0.08356403, 0.06455997, 0.06224259,
       0.01561862, 0.01859531, 0.08916356, 0.02985151, 0.0317154 ,
       0.06238241, 0.06869277, 0.01997138, 0.0172393 , 0.12901191,
       0.05341751], dtype=float32)
 b_bf16     = array([15881, 15861, 15787, 15748, 15743, 15488, 15512, 15799, 15605,
       15618, 15744, 15757, 15524, 15501, 15876, 15707], dtype=uint16)
 b_f32_rounded = array([0.13378906, 0.11962891, 0.08349609, 0.06445312, 0.06225586,
       0.015625  , 0.01855469, 0.08935547, 0.02990723, 0.03173828,
       0.0625    , 0.06884766, 0.02001953, 0.01721191, 0.12890625,
       0.0534668 ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.166272 ,  6.6893787,  8.846029 , 10.180864 ,  7.9459424,
         5.0950565,  4.030119 ,  7.4675174,  3.791... ,  4.6773214,
         5.933567 ,  6.7594185,  8.284556 ,  8.743418 ,  5.7720304,
         3.95881  ]], dtype=float32)
 c_bf16     = array([[16581, 16598, 16654, 16675, 16638, 16547, 16513, 16623, 16499,
        16694, 16638, 16563, 16669, 16656, 1667...614, 16533, 16561, 16519, 16549, 16593, 16516,
        16534, 16574, 16600, 16645, 16652, 16569, 16509]], dtype=uint16)
 c_f32_rounded = array([[ 6.15625 ,  6.6875  ,  8.875   , 10.1875  ,  7.9375  ,  5.09375 ,
         4.03125 ,  7.46875 ,  3.796875, 11....3125 ,  4.125   ,  4.6875  ,  5.9375  ,  6.75    ,
         8.3125  ,  8.75    ,  5.78125 ,  3.953125]], dtype=float32)
 capability = 'neon_i8'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([1.37695312e-01, 1.13769531e-01, 3.54003906e-02, 1.12792969e-01,
       1.64794922e-02, 4.80957031e-02, 1.48773...5.15625 ,  6.53125 ,  4.125   ,  4.6875  ,  5.9375  ,  6.75    ,
         8.3125  ,  8.75    ,  5.78125 ,  3.953125]]))
        before     = 9882660597114
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([1.37695312e-01, 1.13769531e-01, 3.54003906e-02, 1.12792969e-01,
       1.64794922e-02, 4.80957031e-02, 1.487731...1.59179688e-01, 2.30712891e-02, 1.74560547e-02,
       9.86328125e-02, 4.71191406e-02, 5.44433594e-02, 1.30615234e-02])
 y = array([0.13378906, 0.11962891, 0.08349609, 0.06445312, 0.06225586,
       0.015625  , 0.01855469, 0.08935547, 0.02990723, 0.03173828,
       0.0625    , 0.06884766, 0.02001953, 0.01721191, 0.12890625,
       0.0534668 ])
 z = array([[ 6.15625 ,  6.6875  ,  8.875   , 10.1875  ,  7.9375  ,  5.09375 ,
         4.03125 ,  7.46875 ,  3.796875, 11.... 5.15625 ,  6.53125 ,  4.125   ,  4.6875  ,  5.9375  ,  6.75    ,
         8.3125  ,  8.75    ,  5.78125 ,  3.953125]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.00390625, -0.00585938, -0.0480957 ,  0.04833984, -0.04577637,
        0.0324707 , -0.01840591, -0.01806641,  0.02087402,  0.12744141,
       -0.03942871, -0.0513916 ,  0.07861328,  0.02990723, -0.07446289,
       -0.04040527])
 x          = array([1.37695312e-01, 1.13769531e-01, 3.54003906e-02, 1.12792969e-01,
       1.64794922e-02, 4.80957031e-02, 1.487731...1.59179688e-01, 2.30712891e-02, 1.74560547e-02,
       9.86328125e-02, 4.71191406e-02, 5.44433594e-02, 1.30615234e-02])
 y          = array([0.13378906, 0.11962891, 0.08349609, 0.06445312, 0.06225586,
       0.015625  , 0.01855469, 0.08935547, 0.02990723, 0.03173828,
       0.0625    , 0.06884766, 0.02001953, 0.01721191, 0.12890625,
       0.0534668 ])
 z          = array([[ 6.15625 ,  6.6875  ,  8.875   , 10.1875  ,  7.9375  ,  5.09375 ,
         4.03125 ,  7.46875 ,  3.796875, 11.... 5.15625 ,  6.53125 ,  4.125   ,  4.6875  ,  5.9375  ,  6.75    ,
         8.3125  ,  8.75    ,  5.78125 ,  3.953125]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[neon_i8-mahalanobis-33-2-5] _________________

 ndim = 33, metric = 'mahalanobis', capability = 'neon_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.02771401, 0.02441871, 0.01009332, 0.06034269, 0.05200594,
       0.0116273 , 0.07018041, 0.04166625, 0.053369... 0.04392768, 0.02068539, 0.03772377, 0.02286907, 0.02571474,
       0.03509624, 0.04845653, 0.03760914], dtype=float32)
 a_bf16     = array([15587, 15560, 15397, 15735, 15701, 15423, 15760, 15659, 15707,
       15452, 15561, 15673, 15648, 15415, 15653,...15586, 15569, 15695, 15508, 15245, 15492, 15668, 15529,
       15643, 15547, 15571, 15632, 15686, 15642], dtype=uint16)
 a_f32_rounded = array([0.02770996, 0.02441406, 0.0100708 , 0.06030273, 0.05200195,
       0.01165771, 0.0703125 , 0.04174805, 0.053466... 0.04394531, 0.02062988, 0.0378418 , 0.02282715, 0.02575684,
       0.03515625, 0.04833984, 0.03759766], dtype=float32)
 b          = array([0.01929059, 0.00443944, 0.00504724, 0.00363999, 0.00726622,
       0.03114038, 0.00969869, 0.03168059, 0.006743... 0.00022595, 0.08307818, 0.05936768, 0.00509815, 0.05018812,
       0.05876008, 0.02152122, 0.06042321], dtype=float32)
 b_bf16     = array([15518, 15249, 15269, 15215, 15342, 15615, 15391, 15618, 15325,
       15385, 15420, 15325, 15533, 15722, 15563,...15618, 15644, 15762, 15621, 15555, 15740, 14701, 15786,
       15731, 15271, 15694, 15729, 15536, 15735], dtype=uint16)
 b_f32_rounded = array([0.01928711, 0.00442505, 0.0050354 , 0.00364685, 0.00726318,
       0.03112793, 0.00970459, 0.03173828, 0.006744... 0.00022602, 0.08300781, 0.05932617, 0.00509644, 0.05029297,
       0.05883789, 0.02148438, 0.06030273], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[28.939184, 29.864614, 36.386215, ..., 29.950834, 33.099014,
        26.944416],
       [29.864614, 16.692266, ...    12.652157],
       [26.944416, 21.16245 , 17.631853, ..., 23.896809, 12.652157,
        25.271294]], dtype=float32)
 c_bf16     = array([[16872, 16879, 16914, ..., 16880, 16900, 16856],
       [16879, 16774, 16798, ..., 16826, 16822, 16809],
      ...[16900, 16822, 16809, ..., 16819, 16779, 16714],
       [16856, 16809, 16781, ..., 16831, 16714, 16842]], dtype=uint16)
 c_f32_rounded = array([[29.   , 29.875, 36.5  , ..., 30.   , 33.   , 27.   ],
       [29.875, 16.75 , 19.75 , ..., 23.25 , 22.75 , 21.....125, ..., 22.375, 17.375, 12.625],
       [27.   , 21.125, 17.625, ..., 23.875, 12.625, 25.25 ]],
      dtype=float32)
 capability = 'neon_i8'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02770996, 0.02441406, 0.0100708 , 0.06030273, 0.05200195,
       0.01165771, 0.0703125 , 0.04174805, 0.05346... [33.   , 22.75 , 21.125, ..., 22.375, 17.375, 12.625],
       [27.   , 21.125, 17.625, ..., 23.875, 12.625, 25.25 ]]))
        before     = 9883419611284
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02770996, 0.02441406, 0.0100708 , 0.06030273, 0.05200195,
       0.01165771, 0.0703125 , 0.04174805, 0.053466...1611328,
       0.04394531, 0.02062988, 0.0378418 , 0.02282715, 0.02575684,
       0.03515625, 0.04833984, 0.03759766])
 y = array([0.01928711, 0.00442505, 0.0050354 , 0.00364685, 0.00726318,
       0.03112793, 0.00970459, 0.03173828, 0.006744...6152344,
       0.00022602, 0.08300781, 0.05932617, 0.00509644, 0.05029297,
       0.05883789, 0.02148438, 0.06030273])
 z = array([[29.   , 29.875, 36.5  , ..., 30.   , 33.   , 27.   ],
       [29.875, 16.75 , 19.75 , ..., 23.25 , 22.75 , 21....  [33.   , 22.75 , 21.125, ..., 22.375, 17.375, 12.625],
       [27.   , 21.125, 17.625, ..., 23.875, 12.625, 25.25 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.00842285,  0.01998901,  0.0050354 ,  0.05665588,  0.04473877,
       -0.01947021,  0.06060791,  0.01000977, ...
        0.04371929, -0.06237793, -0.02148438,  0.01773071, -0.02453613,
       -0.02368164,  0.02685547, -0.02270508])
 x          = array([0.02770996, 0.02441406, 0.0100708 , 0.06030273, 0.05200195,
       0.01165771, 0.0703125 , 0.04174805, 0.053466...1611328,
       0.04394531, 0.02062988, 0.0378418 , 0.02282715, 0.02575684,
       0.03515625, 0.04833984, 0.03759766])
 y          = array([0.01928711, 0.00442505, 0.0050354 , 0.00364685, 0.00726318,
       0.03112793, 0.00970459, 0.03173828, 0.006744...6152344,
       0.00022602, 0.08300781, 0.05932617, 0.00509644, 0.05029297,
       0.05883789, 0.02148438, 0.06030273])
 z          = array([[29.   , 29.875, 36.5  , ..., 30.   , 33.   , 27.   ],
       [29.875, 16.75 , 19.75 , ..., 23.25 , 22.75 , 21....  [33.   , 22.75 , 21.125, ..., 22.375, 17.375, 12.625],
       [27.   , 21.125, 17.625, ..., 23.875, 12.625, 25.25 ]])

 scripts/test.py:152: RuntimeWarning
 ___________________ test_curved_bf16[sve-mahalanobis-11-1-5] ___________________

 ndim = 11, metric = 'mahalanobis', capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.07749116, 0.108697  , 0.06127771, 0.12634999, 0.01117223,
       0.14559685, 0.14798415, 0.08119244, 0.1695299 , 0.06788721,
       0.00282143], dtype=float32)
 a_bf16     = array([15775, 15839, 15739, 15873, 15415, 15893, 15896, 15782, 15918,
       15755, 15161], dtype=uint16)
 a_f32_rounded = array([0.07763672, 0.10888672, 0.0612793 , 0.12597656, 0.01116943,
       0.14550781, 0.1484375 , 0.08105469, 0.16992188, 0.06787109,
       0.00282288], dtype=float32)
 b          = array([0.02918996, 0.1151444 , 0.16139738, 0.09343333, 0.0747437 ,
       0.10610325, 0.1390541 , 0.00055409, 0.12142816, 0.01044094,
       0.14851063], dtype=float32)
 b_bf16     = array([15599, 15852, 15909, 15807, 15769, 15833, 15886, 14865, 15865,
       15403, 15896], dtype=uint16)
 b_f32_rounded = array([0.0291748 , 0.11523438, 0.16113281, 0.09326172, 0.07470703,
       0.10595703, 0.13867188, 0.00055313, 0.12158203, 0.01043701,
       0.1484375 ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 8.167284 ,  7.428071 ,  8.448956 , 12.435962 ,  5.611139 ,
         4.517789 ,  8.944842 ,  7.1289244,  9.618...5,  2.873657 ,
         8.42792  ,  5.4611506,  4.930824 ,  4.352989 ,  5.4175773,
         3.4242263]], dtype=float32)
 c_bf16     = array([[16643, 16622, 16647, 16711, 16564, 16529, 16655, 16612, 16666,
        16658, 16627],
       [16622, 16530, 16...5, 16557],
       [16627, 16472, 16545, 16523, 16440, 16647, 16559, 16542, 16523,
        16557, 16475]], dtype=uint16)
 c_f32_rounded = array([[ 8.1875  ,  7.4375  ,  8.4375  , 12.4375  ,  5.625   ,  4.53125 ,
         8.9375  ,  7.125   ,  9.625   ,  9....  4.34375 ,  2.875   ,  8.4375  ,
         5.46875 ,  4.9375  ,  4.34375 ,  5.40625 ,  3.421875]],
      dtype=float32)
 capability = 'sve'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.07763672, 0.10888672, 0.0612793 , 0.12597656, 0.01116943,
       0.14550781, 0.1484375 , 0.08105469, 0.16992...3.375   ,  5.03125 ,  4.34375 ,  2.875   ,  8.4375  ,
         5.46875 ,  4.9375  ,  4.34375 ,  5.40625 ,  3.421875]]))
        before     = 9884402068022
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.07763672, 0.10888672, 0.0612793 , 0.12597656, 0.01116943,
       0.14550781, 0.1484375 , 0.08105469, 0.16992188, 0.06787109,
       0.00282288])
 y = array([0.0291748 , 0.11523438, 0.16113281, 0.09326172, 0.07470703,
       0.10595703, 0.13867188, 0.00055313, 0.12158203, 0.01043701,
       0.1484375 ])
 z = array([[ 8.1875  ,  7.4375  ,  8.4375  , 12.4375  ,  5.625   ,  4.53125 ,
         8.9375  ,  7.125   ,  9.625   ,  9.... 3.375   ,  5.03125 ,  4.34375 ,  2.875   ,  8.4375  ,
         5.46875 ,  4.9375  ,  4.34375 ,  5.40625 ,  3.421875]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.04846191, -0.00634766, -0.09985352,  0.03271484, -0.0635376 ,
        0.03955078,  0.00976562,  0.08050156,  0.04833984,  0.05743408,
       -0.14561462])
 x          = array([0.07763672, 0.10888672, 0.0612793 , 0.12597656, 0.01116943,
       0.14550781, 0.1484375 , 0.08105469, 0.16992188, 0.06787109,
       0.00282288])
 y          = array([0.0291748 , 0.11523438, 0.16113281, 0.09326172, 0.07470703,
       0.10595703, 0.13867188, 0.00055313, 0.12158203, 0.01043701,
       0.1484375 ])
 z          = array([[ 8.1875  ,  7.4375  ,  8.4375  , 12.4375  ,  5.625   ,  4.53125 ,
         8.9375  ,  7.125   ,  9.625   ,  9.... 3.375   ,  5.03125 ,  4.34375 ,  2.875   ,  8.4375  ,
         5.46875 ,  4.9375  ,  4.34375 ,  5.40625 ,  3.421875]])

 scripts/test.py:152: RuntimeWarning
 ___________________ test_curved_bf16[sve-mahalanobis-11-3-5] ___________________

 ndim = 11, metric = 'mahalanobis', capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.1061162 , 0.08914208, 0.23265861, 0.11570425, 0.04850923,
       0.09372661, 0.05505301, 0.10444774, 0.00626087, 0.14633417,
       0.00204719], dtype=float32)
 a_bf16     = array([15833, 15799, 15982, 15853, 15687, 15808, 15713, 15830, 15309,
       15894, 15110], dtype=uint16)
 a_f32_rounded = array([0.10595703, 0.08935547, 0.23242188, 0.11572266, 0.04858398,
       0.09375   , 0.05493164, 0.10449219, 0.0062561 , 0.14648438,
       0.00204468], dtype=float32)
 b          = array([0.07205774, 0.04271147, 0.12569878, 0.05903756, 0.05355068,
       0.1289715 , 0.12575123, 0.08772001, 0.16481343, 0.1179905 ,
       0.02169706], dtype=float32)
 b_bf16     = array([15764, 15663, 15873, 15730, 15707, 15876, 15873, 15796, 15913,
       15858, 15538], dtype=uint16)
 b_f32_rounded = array([0.07226562, 0.04272461, 0.12597656, 0.05908203, 0.0534668 ,
       0.12890625, 0.12597656, 0.08789062, 0.16503906, 0.11816406,
       0.02172852], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 4.8108883,  2.7147663,  3.095244 ,  2.9948854,  2.9209447,
         2.9544334,  2.8800848,  3.3324971,  2.748... ,  3.4981306,
         7.306075 ,  3.359054 , 10.099337 ,  4.807614 ,  4.627322 ,
         4.5794272]], dtype=float32)
 c_bf16     = array([[16538, 16430, 16454, 16448, 16443, 16445, 16440, 16469, 16432,
        16304, 16487],
       [16430, 16618, 16...1, 16532],
       [16487, 16647, 16560, 16665, 16480, 16618, 16471, 16674, 16538,
        16532, 16531]], dtype=uint16)
 c_f32_rounded = array([[ 4.8125   ,  2.71875  ,  3.09375  ,  3.       ,  2.921875 ,
         2.953125 ,  2.875    ,  3.328125 ,  2.75 ... ,  3.5      ,
         7.3125   ,  3.359375 , 10.125    ,  4.8125   ,  4.625    ,
         4.59375  ]], dtype=float32)
 capability = 'sve'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.10595703, 0.08935547, 0.23242188, 0.11572266, 0.04858398,
       0.09375   , 0.05493164, 0.10449219, 0.00625...   ,  9.5625   ,  3.5      ,
         7.3125   ,  3.359375 , 10.125    ,  4.8125   ,  4.625    ,
         4.59375  ]]))
        before     = 9885048337786
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.10595703, 0.08935547, 0.23242188, 0.11572266, 0.04858398,
       0.09375   , 0.05493164, 0.10449219, 0.0062561 , 0.14648438,
       0.00204468])
 y = array([0.07226562, 0.04272461, 0.12597656, 0.05908203, 0.0534668 ,
       0.12890625, 0.12597656, 0.08789062, 0.16503906, 0.11816406,
       0.02172852])
 z = array([[ 4.8125   ,  2.71875  ,  3.09375  ,  3.       ,  2.921875 ,
         2.953125 ,  2.875    ,  3.328125 ,  2.75 ...    ,  9.5625   ,  3.5      ,
         7.3125   ,  3.359375 , 10.125    ,  4.8125   ,  4.625    ,
         4.59375  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.03369141,  0.04663086,  0.10644531,  0.05664062, -0.00488281,
       -0.03515625, -0.07104492,  0.01660156, -0.15878296,  0.02832031,
       -0.01968384])
 x          = array([0.10595703, 0.08935547, 0.23242188, 0.11572266, 0.04858398,
       0.09375   , 0.05493164, 0.10449219, 0.0062561 , 0.14648438,
       0.00204468])
 y          = array([0.07226562, 0.04272461, 0.12597656, 0.05908203, 0.0534668 ,
       0.12890625, 0.12597656, 0.08789062, 0.16503906, 0.11816406,
       0.02172852])
 z          = array([[ 4.8125   ,  2.71875  ,  3.09375  ,  3.       ,  2.921875 ,
         2.953125 ,  2.875    ,  3.328125 ,  2.75 ...    ,  9.5625   ,  3.5      ,
         7.3125   ,  3.359375 , 10.125    ,  4.8125   ,  4.625    ,
         4.59375  ]])

 scripts/test.py:152: RuntimeWarning
 ___________________ test_curved_bf16[sve-mahalanobis-11-4-5] ___________________

 ndim = 11, metric = 'mahalanobis', capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.11329042, 0.08624797, 0.04481695, 0.11130306, 0.06396271,
       0.00678671, 0.12659977, 0.00356113, 0.12791523, 0.11409496,
       0.2014211 ], dtype=float32)
 a_bf16     = array([15848, 15793, 15672, 15844, 15747, 15326, 15874, 15209, 15875,
       15850, 15950], dtype=uint16)
 a_f32_rounded = array([0.11328125, 0.08642578, 0.04492188, 0.11132812, 0.06396484,
       0.0067749 , 0.12695312, 0.0035553 , 0.12792969, 0.11425781,
       0.20117188], dtype=float32)
 b          = array([0.03012873, 0.02095925, 0.00689259, 0.11898398, 0.21793362,
       0.13338871, 0.05856706, 0.05226625, 0.15816483, 0.12485888,
       0.07785605], dtype=float32)
 b_bf16     = array([15607, 15532, 15330, 15860, 15967, 15881, 15728, 15702, 15906,
       15872, 15775], dtype=uint16)
 b_f32_rounded = array([0.03015137, 0.02099609, 0.00689697, 0.11914062, 0.21777344,
       0.13378906, 0.05859375, 0.05224609, 0.15820312, 0.125     ,
       0.07763672], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 9.352192 ,  3.9985487,  6.09161  ,  7.8486266,  5.6173744,
         6.768695 ,  6.8761983,  4.1257057,  4.060... ,  5.5323453,
        11.950905 ,  8.140364 ,  8.561466 ,  6.2532573,  8.560626 ,
         4.7043653]], dtype=float32)
 c_bf16     = array([[16662, 16512, 16579, 16635, 16564, 16601, 16604, 16516, 16514,
        16570, 16540],
       [16512, 16597, 16...2, 16649],
       [16540, 16642, 16591, 16641, 16561, 16703, 16642, 16649, 16584,
        16649, 16535]], dtype=uint16)
 c_f32_rounded = array([[ 9.375   ,  4.      ,  6.09375 ,  7.84375 ,  5.625   ,  6.78125 ,
         6.875   ,  4.125   ,  4.0625  ,  5....  8.0625  ,  5.53125 , 11.9375  ,
         8.125   ,  8.5625  ,  6.25    ,  8.5625  ,  4.71875 ]],
      dtype=float32)
 capability = 'sve'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.11328125, 0.08642578, 0.04492188, 0.11132812, 0.06396484,
       0.0067749 , 0.12695312, 0.0035553 , 0.12792...8.125   ,  6.46875 ,  8.0625  ,  5.53125 , 11.9375  ,
         8.125   ,  8.5625  ,  6.25    ,  8.5625  ,  4.71875 ]]))
        before     = 9885678981715
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.11328125, 0.08642578, 0.04492188, 0.11132812, 0.06396484,
       0.0067749 , 0.12695312, 0.0035553 , 0.12792969, 0.11425781,
       0.20117188])
 y = array([0.03015137, 0.02099609, 0.00689697, 0.11914062, 0.21777344,
       0.13378906, 0.05859375, 0.05224609, 0.15820312, 0.125     ,
       0.07763672])
 z = array([[ 9.375   ,  4.      ,  6.09375 ,  7.84375 ,  5.625   ,  6.78125 ,
         6.875   ,  4.125   ,  4.0625  ,  5.... 8.125   ,  6.46875 ,  8.0625  ,  5.53125 , 11.9375  ,
         8.125   ,  8.5625  ,  6.25    ,  8.5625  ,  4.71875 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.08312988,  0.06542969,  0.0380249 , -0.0078125 , -0.15380859,
       -0.12701416,  0.06835938, -0.0486908 , -0.03027344, -0.01074219,
        0.12353516])
 x          = array([0.11328125, 0.08642578, 0.04492188, 0.11132812, 0.06396484,
       0.0067749 , 0.12695312, 0.0035553 , 0.12792969, 0.11425781,
       0.20117188])
 y          = array([0.03015137, 0.02099609, 0.00689697, 0.11914062, 0.21777344,
       0.13378906, 0.05859375, 0.05224609, 0.15820312, 0.125     ,
       0.07763672])
 z          = array([[ 9.375   ,  4.      ,  6.09375 ,  7.84375 ,  5.625   ,  6.78125 ,
         6.875   ,  4.125   ,  4.0625  ,  5.... 8.125   ,  6.46875 ,  8.0625  ,  5.53125 , 11.9375  ,
         8.125   ,  8.5625  ,  6.25    ,  8.5625  ,  4.71875 ]])

 scripts/test.py:152: RuntimeWarning
 ___________________ test_curved_bf16[sve-mahalanobis-16-2-5] ___________________

 ndim = 16, metric = 'mahalanobis', capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.04814473, 0.02076086, 0.02105057, 0.04389034, 0.12213339,
       0.06631447, 0.00496773, 0.21588615, 0.01833241, 0.06165598,
       0.00764343, 0.03219687, 0.0722173 , 0.1444225 , 0.0750166 ,
       0.04536671], dtype=float32)
 a_bf16     = array([15685, 15530, 15532, 15668, 15866, 15752, 15267, 15965, 15510,
       15741, 15354, 15620, 15764, 15892, 15770, 15674], dtype=uint16)
 a_f32_rounded = array([0.0480957 , 0.02075195, 0.02099609, 0.04394531, 0.12207031,
       0.06640625, 0.00497437, 0.21582031, 0.01831055, 0.06176758,
       0.00762939, 0.03222656, 0.07226562, 0.14453125, 0.07519531,
       0.04541016], dtype=float32)
 b          = array([0.05422818, 0.01833731, 0.04266819, 0.08349113, 0.12166468,
       0.02291767, 0.01081976, 0.0819141 , 0.07992747, 0.04749508,
       0.06184949, 0.22571923, 0.01782056, 0.05257646, 0.04835343,
       0.03021722], dtype=float32)
 b_bf16     = array([15710, 15510, 15663, 15787, 15865, 15548, 15409, 15784, 15780,
       15683, 15741, 15975, 15506, 15703, 15686, 15608], dtype=uint16)
 b_f32_rounded = array([0.05419922, 0.01831055, 0.04272461, 0.08349609, 0.12158203,
       0.02294922, 0.01080322, 0.08203125, 0.08007812, 0.04760742,
       0.06176758, 0.22558594, 0.01782227, 0.05249023, 0.04833984,
       0.03027344], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 8.451854 , 11.482021 ,  4.088371 , 13.54543  , 10.133546 ,
         8.988427 , 12.596884 , 14.004304 , 11.119... , 15.191505 ,
        14.657041 , 13.745955 , 13.092186 , 16.55033  , 16.493143 ,
        21.08838  ]], dtype=float32)
 c_bf16     = array([[16647, 16696, 16515, 16729, 16674, 16656, 16714, 16736, 16690,
        16675, 16691, 16680, 16642, 16649, 1662...724, 16762, 16771, 16655, 16719, 16793, 16764,
        16755, 16747, 16732, 16721, 16772, 16772, 16809]], dtype=uint16)
 c_f32_rounded = array([[ 8.4375 , 11.5    ,  4.09375, 13.5625 , 10.125  ,  9.     ,
        12.625  , 14.     , 11.125  , 10.1875 , 11...75 , 19.125  , 15.75   , 15.1875 , 14.6875 , 13.75   ,
        13.0625 , 16.5    , 16.5    , 21.125  ]], dtype=float32)
 capability = 'sve'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.0480957 , 0.02075195, 0.02099609, 0.04394531, 0.12207031,
       0.06640625, 0.00497437, 0.21582031, 0.01831...
        12.9375 , 19.125  , 15.75   , 15.1875 , 14.6875 , 13.75   ,
        13.0625 , 16.5    , 16.5    , 21.125  ]]))
        before     = 9886335430266
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.0480957 , 0.02075195, 0.02099609, 0.04394531, 0.12207031,
       0.06640625, 0.00497437, 0.21582031, 0.01831055, 0.06176758,
       0.00762939, 0.03222656, 0.07226562, 0.14453125, 0.07519531,
       0.04541016])
 y = array([0.05419922, 0.01831055, 0.04272461, 0.08349609, 0.12158203,
       0.02294922, 0.01080322, 0.08203125, 0.08007812, 0.04760742,
       0.06176758, 0.22558594, 0.01782227, 0.05249023, 0.04833984,
       0.03027344])
 z = array([[ 8.4375 , 11.5    ,  4.09375, 13.5625 , 10.125  ,  9.     ,
        12.625  , 14.     , 11.125  , 10.1875 , 11...,
        12.9375 , 19.125  , 15.75   , 15.1875 , 14.6875 , 13.75   ,
        13.0625 , 16.5    , 16.5    , 21.125  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.00610352,  0.00244141, -0.02172852, -0.03955078,  0.00048828,
        0.04345703, -0.00582886,  0.13378906, -0.06176758,  0.01416016,
       -0.05413818, -0.19335938,  0.05444336,  0.09204102,  0.02685547,
        0.01513672])
 x          = array([0.0480957 , 0.02075195, 0.02099609, 0.04394531, 0.12207031,
       0.06640625, 0.00497437, 0.21582031, 0.01831055, 0.06176758,
       0.00762939, 0.03222656, 0.07226562, 0.14453125, 0.07519531,
       0.04541016])
 y          = array([0.05419922, 0.01831055, 0.04272461, 0.08349609, 0.12158203,
       0.02294922, 0.01080322, 0.08203125, 0.08007812, 0.04760742,
       0.06176758, 0.22558594, 0.01782227, 0.05249023, 0.04833984,
       0.03027344])
 z          = array([[ 8.4375 , 11.5    ,  4.09375, 13.5625 , 10.125  ,  9.     ,
        12.625  , 14.     , 11.125  , 10.1875 , 11...,
        12.9375 , 19.125  , 15.75   , 15.1875 , 14.6875 , 13.75   ,
        13.0625 , 16.5    , 16.5    , 21.125  ]])

 scripts/test.py:152: RuntimeWarning
 ___________________ test_curved_bf16[sve-mahalanobis-16-4-5] ___________________

 ndim = 16, metric = 'mahalanobis', capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.01948337, 0.05178652, 0.05493792, 0.01409185, 0.06849021,
       0.02096938, 0.07028051, 0.07031328, 0.02161663, 0.06226549,
       0.04409088, 0.09082893, 0.10754535, 0.11933503, 0.11147716,
       0.0724875 ], dtype=float32)
 a_bf16     = array([15520, 15700, 15713, 15463, 15756, 15532, 15760, 15760, 15537,
       15743, 15669, 15802, 15836, 15860, 15844, 15764], dtype=uint16)
 a_f32_rounded = array([0.01953125, 0.05175781, 0.05493164, 0.01409912, 0.06835938,
       0.02099609, 0.0703125 , 0.0703125 , 0.02160645, 0.06225586,
       0.04418945, 0.09082031, 0.10742188, 0.11914062, 0.11132812,
       0.07226562], dtype=float32)
 b          = array([0.04691336, 0.06829056, 0.0073219 , 0.09915267, 0.05131653,
       0.04066078, 0.05432919, 0.10679875, 0.03285411, 0.01215882,
       0.09683015, 0.03478584, 0.1527629 , 0.04031371, 0.04157227,
       0.11393841], dtype=float32)
 b_bf16     = array([15680, 15756, 15344, 15819, 15698, 15655, 15711, 15835, 15623,
       15431, 15814, 15630, 15900, 15653, 15658, 15849], dtype=uint16)
 b_f32_rounded = array([0.046875  , 0.06835938, 0.00732422, 0.09912109, 0.05126953,
       0.04077148, 0.05444336, 0.10693359, 0.03295898, 0.012146  ,
       0.09667969, 0.03466797, 0.15234375, 0.0402832 , 0.04150391,
       0.11376953], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[11.978694 , 10.151522 ,  8.169828 , 12.560961 ,  9.133539 ,
        11.559786 , 11.977739 , 14.507921 , 10.368... , 13.432432 ,
         9.0387125, 10.258771 ,  9.456911 ,  9.003125 , 11.73809  ,
        14.262207 ]], dtype=float32)
 c_bf16     = array([[16704, 16674, 16643, 16713, 16658, 16697, 16704, 16744, 16678,
        16721, 16579, 16696, 16678, 16718, 1671...621, 16631, 16614, 16736, 16672, 16687, 16670,
        16727, 16657, 16676, 16663, 16656, 16700, 16740]], dtype=uint16)
 c_f32_rounded = array([[12.     , 10.125  ,  8.1875 , 12.5625 ,  9.125  , 11.5625 ,
        12.     , 14.5    , 10.375  , 13.0625 ,  6...   , 10.9375 ,  9.875  , 13.4375 ,  9.0625 , 10.25   ,
         9.4375 ,  9.     , 11.75   , 14.25   ]], dtype=float32)
 capability = 'sve'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.01953125, 0.05175781, 0.05493164, 0.01409912, 0.06835938,
       0.02099609, 0.0703125 , 0.0703125 , 0.02160...
        10.     , 10.9375 ,  9.875  , 13.4375 ,  9.0625 , 10.25   ,
         9.4375 ,  9.     , 11.75   , 14.25   ]]))
        before     = 9887018753886
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.01953125, 0.05175781, 0.05493164, 0.01409912, 0.06835938,
       0.02099609, 0.0703125 , 0.0703125 , 0.02160645, 0.06225586,
       0.04418945, 0.09082031, 0.10742188, 0.11914062, 0.11132812,
       0.07226562])
 y = array([0.046875  , 0.06835938, 0.00732422, 0.09912109, 0.05126953,
       0.04077148, 0.05444336, 0.10693359, 0.03295898, 0.012146  ,
       0.09667969, 0.03466797, 0.15234375, 0.0402832 , 0.04150391,
       0.11376953])
 z = array([[12.     , 10.125  ,  8.1875 , 12.5625 ,  9.125  , 11.5625 ,
        12.     , 14.5    , 10.375  , 13.0625 ,  6...,
        10.     , 10.9375 ,  9.875  , 13.4375 ,  9.0625 , 10.25   ,
         9.4375 ,  9.     , 11.75   , 14.25   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.02734375, -0.01660156,  0.04760742, -0.08502197,  0.01708984,
       -0.01977539,  0.01586914, -0.03662109, -0.01135254,  0.05010986,
       -0.05249023,  0.05615234, -0.04492188,  0.07885742,  0.06982422,
       -0.04150391])
 x          = array([0.01953125, 0.05175781, 0.05493164, 0.01409912, 0.06835938,
       0.02099609, 0.0703125 , 0.0703125 , 0.02160645, 0.06225586,
       0.04418945, 0.09082031, 0.10742188, 0.11914062, 0.11132812,
       0.07226562])
 y          = array([0.046875  , 0.06835938, 0.00732422, 0.09912109, 0.05126953,
       0.04077148, 0.05444336, 0.10693359, 0.03295898, 0.012146  ,
       0.09667969, 0.03466797, 0.15234375, 0.0402832 , 0.04150391,
       0.11376953])
 z          = array([[12.     , 10.125  ,  8.1875 , 12.5625 ,  9.125  , 11.5625 ,
        12.     , 14.5    , 10.375  , 13.0625 ,  6...,
        10.     , 10.9375 ,  9.875  , 13.4375 ,  9.0625 , 10.25   ,
         9.4375 ,  9.     , 11.75   , 14.25   ]])

 scripts/test.py:152: RuntimeWarning
 ___________________ test_curved_bf16[sve-mahalanobis-33-1-5] ___________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.04997705, 0.0025466 , 0.00370993, 0.01720835, 0.00359949,
       0.01317152, 0.05188768, 0.04159178, 0.034261... 0.01344748, 0.03360195, 0.03006903, 0.00380832, 0.02557713,
       0.04743458, 0.0485619 , 0.0175745 ], dtype=float32)
 a_bf16     = array([15693, 15143, 15219, 15501, 15212, 15448, 15701, 15658, 15628,
       15662, 15747, 15642, 15388, 15353, 15736,...15514, 15853, 15017, 15675, 15657, 15631, 15452, 15626,
       15606, 15226, 15570, 15682, 15687, 15504], dtype=uint16)
 a_f32_rounded = array([0.05004883, 0.00254822, 0.00370789, 0.01721191, 0.00360107,
       0.01318359, 0.05200195, 0.04150391, 0.034179... 0.01342773, 0.03369141, 0.0300293 , 0.0038147 , 0.02563477,
       0.04736328, 0.04858398, 0.01757812], dtype=float32)
 b          = array([0.00950222, 0.01081546, 0.01833257, 0.00394527, 0.02661457,
       0.01071575, 0.03614973, 0.02446504, 0.032995... 0.05159824, 0.06454723, 0.07267114, 0.02589424, 0.03569077,
       0.00500792, 0.02340342, 0.0583711 ], dtype=float32)
 b_bf16     = array([15388, 15409, 15510, 15233, 15578, 15408, 15636, 15560, 15623,
       15693, 15625, 15393, 15654, 15726, 15641,...15555, 15454, 15390, 15547, 15524, 15444, 15699, 15748,
       15765, 15572, 15634, 15268, 15552, 15727], dtype=uint16)
 b_f32_rounded = array([0.00952148, 0.01080322, 0.01831055, 0.00393677, 0.02661133,
       0.01074219, 0.03613281, 0.02441406, 0.032958... 0.05151367, 0.06445312, 0.07275391, 0.02587891, 0.03564453,
       0.00500488, 0.0234375 , 0.05834961], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[28.629984, 20.268099, 21.680647, ..., 20.991978, 32.194077,
        21.975376],
       [20.268099, 19.75928 , ...    33.09624 ],
       [21.975376, 22.094889, 16.313854, ..., 25.769047, 33.09624 ,
        51.132343]], dtype=float32)
 c_bf16     = array([[16869, 16802, 16813, ..., 16808, 16897, 16816],
       [16802, 16798, 16793, ..., 16779, 16821, 16817],
      ...[16897, 16821, 16821, ..., 16826, 16890, 16900],
       [16816, 16817, 16771, ..., 16846, 16900, 16973]], dtype=uint16)
 c_f32_rounded = array([[28.625 , 20.25  , 21.625 , ..., 21.    , 32.25  , 22.    ],
       [20.25  , 19.75  , 19.125 , ..., 17.375 , 2..., 23.25  , 31.25  , 33.    ],
       [22.    , 22.125 , 16.375 , ..., 25.75  , 33.    , 51.25  ]],
      dtype=float32)
 capability = 'sve'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.05004883, 0.00254822, 0.00370789, 0.01721191, 0.00360107,
       0.01318359, 0.05200195, 0.04150391, 0.03417...2.625 , 22.625 , ..., 23.25  , 31.25  , 33.    ],
       [22.    , 22.125 , 16.375 , ..., 25.75  , 33.    , 51.25  ]]))
        before     = 9887700425229
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.05004883, 0.00254822, 0.00370789, 0.01721191, 0.00360107,
       0.01318359, 0.05200195, 0.04150391, 0.034179...3491211,
       0.01342773, 0.03369141, 0.0300293 , 0.0038147 , 0.02563477,
       0.04736328, 0.04858398, 0.01757812])
 y = array([0.00952148, 0.01080322, 0.01831055, 0.00393677, 0.02661133,
       0.01074219, 0.03613281, 0.02441406, 0.032958...1293945,
       0.05151367, 0.06445312, 0.07275391, 0.02587891, 0.03564453,
       0.00500488, 0.0234375 , 0.05834961])
 z = array([[28.625 , 20.25  , 21.625 , ..., 21.    , 32.25  , 22.    ],
       [20.25  , 19.75  , 19.125 , ..., 17.375 , 2...22.625 , 22.625 , ..., 23.25  , 31.25  , 33.    ],
       [22.    , 22.125 , 16.375 , ..., 25.75  , 33.    , 51.25  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.04052734, -0.008255  , -0.01460266,  0.01327515, -0.02301025,
        0.00244141,  0.01586914,  0.01708984, ...
       -0.03808594, -0.03076172, -0.04272461, -0.02206421, -0.01000977,
        0.0423584 ,  0.02514648, -0.04077148])
 x          = array([0.05004883, 0.00254822, 0.00370789, 0.01721191, 0.00360107,
       0.01318359, 0.05200195, 0.04150391, 0.034179...3491211,
       0.01342773, 0.03369141, 0.0300293 , 0.0038147 , 0.02563477,
       0.04736328, 0.04858398, 0.01757812])
 y          = array([0.00952148, 0.01080322, 0.01831055, 0.00393677, 0.02661133,
       0.01074219, 0.03613281, 0.02441406, 0.032958...1293945,
       0.05151367, 0.06445312, 0.07275391, 0.02587891, 0.03564453,
       0.00500488, 0.0234375 , 0.05834961])
 z          = array([[28.625 , 20.25  , 21.625 , ..., 21.    , 32.25  , 22.    ],
       [20.25  , 19.75  , 19.125 , ..., 17.375 , 2...22.625 , 22.625 , ..., 23.25  , 31.25  , 33.    ],
       [22.    , 22.125 , 16.375 , ..., 25.75  , 33.    , 51.25  ]])

 scripts/test.py:152: RuntimeWarning
 ___________________ test_curved_bf16[sve-mahalanobis-33-2-5] ___________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.01748706, 0.01365085, 0.01036716, 0.0176843 , 0.03561447,
       0.06670987, 0.02977232, 0.01627612, 0.048273... 0.01684966, 0.03027186, 0.06392846, 0.05521099, 0.00830103,
       0.11597884, 0.014849  , 0.01109209], dtype=float32)
 a_bf16     = array([15503, 15456, 15402, 15505, 15634, 15753, 15604, 15493, 15686,
       15645, 15639, 15508, 15561, 15139, 15715,...15591, 15400, 15525, 15438, 14979, 15647, 15498, 15608,
       15747, 15714, 15368, 15854, 15475, 15414], dtype=uint16)
 a_f32_rounded = array([0.01745605, 0.01367188, 0.01037598, 0.0177002 , 0.03564453,
       0.06689453, 0.02978516, 0.01623535, 0.048339... 0.0168457 , 0.03027344, 0.06396484, 0.05517578, 0.00830078,
       0.11621094, 0.01483154, 0.0111084 ], dtype=float32)
 b          = array([0.05227228, 0.01844944, 0.00833553, 0.03316258, 0.04366094,
       0.01413609, 0.00136596, 0.00967561, 0.027921... 0.00507778, 0.02169636, 0.00543777, 0.01462963, 0.02330264,
       0.06876747, 0.01474003, 0.01691818], dtype=float32)
 b_bf16     = array([15702, 15511, 15369, 15624, 15667, 15464, 15027, 15391, 15589,
       15371, 15393, 15810, 15614, 15534, 15580,...15813, 15789, 15200, 15774, 15504, 15664, 15270, 15538,
       15282, 15472, 15551, 15757, 15474, 15499], dtype=uint16)
 b_f32_rounded = array([0.05224609, 0.01843262, 0.00836182, 0.03320312, 0.04370117,
       0.01416016, 0.00136566, 0.00970459, 0.027954... 0.00506592, 0.02172852, 0.00543213, 0.01464844, 0.02331543,
       0.06884766, 0.01477051, 0.01696777], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[19.518635, 22.420347, 18.427256, ..., 19.390715, 19.085264,
        20.927778],
       [22.420347, 20.953415, ...    21.79307 ],
       [20.927778, 21.077303, 27.730473, ..., 21.829508, 21.79307 ,
        40.031765]], dtype=float32)
 c_bf16     = array([[16796, 16819, 16787, ..., 16795, 16793, 16807],
       [16819, 16808, 16800, ..., 16794, 16799, 16809],
      ...[16793, 16799, 16801, ..., 16807, 16790, 16814],
       [16807, 16809, 16862, ..., 16815, 16814, 16928]], dtype=uint16)
 c_f32_rounded = array([[19.5  , 22.375, 18.375, ..., 19.375, 19.125, 20.875],
       [22.375, 21.   , 20.   , ..., 19.25 , 19.875, 21.....125, ..., 20.875, 18.75 , 21.75 ],
       [20.875, 21.125, 27.75 , ..., 21.875, 21.75 , 40.   ]],
      dtype=float32)
 capability = 'sve'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.01745605, 0.01367188, 0.01037598, 0.0177002 , 0.03564453,
       0.06689453, 0.02978516, 0.01623535, 0.04833... [19.125, 19.875, 20.125, ..., 20.875, 18.75 , 21.75 ],
       [20.875, 21.125, 27.75 , ..., 21.875, 21.75 , 40.   ]]))
        before     = 9888318947768
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.01745605, 0.01367188, 0.01037598, 0.0177002 , 0.03564453,
       0.06689453, 0.02978516, 0.01623535, 0.048339...3881836,
       0.0168457 , 0.03027344, 0.06396484, 0.05517578, 0.00830078,
       0.11621094, 0.01483154, 0.0111084 ])
 y = array([0.05224609, 0.01843262, 0.00836182, 0.03320312, 0.04370117,
       0.01416016, 0.00136566, 0.00970459, 0.027954...4296875,
       0.00506592, 0.02172852, 0.00543213, 0.01464844, 0.02331543,
       0.06884766, 0.01477051, 0.01696777])
 z = array([[19.5  , 22.375, 18.375, ..., 19.375, 19.125, 20.875],
       [22.375, 21.   , 20.   , ..., 19.25 , 19.875, 21....  [19.125, 19.875, 20.125, ..., 20.875, 18.75 , 21.75 ],
       [20.875, 21.125, 27.75 , ..., 21.875, 21.75 , 40.   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-3.47900391e-02, -4.76074219e-03,  2.01416016e-03, -1.55029297e-02,
       -8.05664062e-03,  5.27343750e-02,  2...3,  5.85327148e-02,
        4.05273438e-02, -1.50146484e-02,  4.73632812e-02,  6.10351562e-05,
       -5.85937500e-03])
 x          = array([0.01745605, 0.01367188, 0.01037598, 0.0177002 , 0.03564453,
       0.06689453, 0.02978516, 0.01623535, 0.048339...3881836,
       0.0168457 , 0.03027344, 0.06396484, 0.05517578, 0.00830078,
       0.11621094, 0.01483154, 0.0111084 ])
 y          = array([0.05224609, 0.01843262, 0.00836182, 0.03320312, 0.04370117,
       0.01416016, 0.00136566, 0.00970459, 0.027954...4296875,
       0.00506592, 0.02172852, 0.00543213, 0.01464844, 0.02331543,
       0.06884766, 0.01477051, 0.01696777])
 z          = array([[19.5  , 22.375, 18.375, ..., 19.375, 19.125, 20.875],
       [22.375, 21.   , 20.   , ..., 19.25 , 19.875, 21....  [19.125, 19.875, 20.125, ..., 20.875, 18.75 , 21.75 ],
       [20.875, 21.125, 27.75 , ..., 21.875, 21.75 , 40.   ]])

 scripts/test.py:152: RuntimeWarning
 ___________________ test_curved_bf16[sve-mahalanobis-33-3-5] ___________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.04518621, 0.00024922, 0.02799306, 0.01902876, 0.01338974,
       0.03645232, 0.04939019, 0.054845  , 0.016500... 0.00622799, 0.04410923, 0.01668775, 0.01605952, 0.03658054,
       0.05053007, 0.04733505, 0.00055232], dtype=float32)
 a_bf16     = array([15673, 14723, 15589, 15516, 15451, 15637, 15690, 15713, 15495,
       15449, 15418, 15238, 15768, 15673, 15631,...14836, 15773, 15640, 15637, 15333, 15712, 15308, 15669,
       15497, 15492, 15638, 15695, 15682, 14865], dtype=uint16)
 a_f32_rounded = array([0.04516602, 0.00024986, 0.0279541 , 0.01904297, 0.0133667 ,
       0.03637695, 0.04931641, 0.05493164, 0.016479... 0.00622559, 0.04418945, 0.01672363, 0.01611328, 0.03662109,
       0.05053711, 0.04736328, 0.00055313], dtype=float32)
 b          = array([0.04582541, 0.03989226, 0.01393795, 0.00205465, 0.00209404,
       0.00383668, 0.01649316, 0.02566185, 0.045121... 0.07050895, 0.06842383, 0.0127728 , 0.05115888, 0.03804231,
       0.12758867, 0.03218453, 0.01158374], dtype=float32)
 b_bf16     = array([15676, 15651, 15460, 15111, 15113, 15227, 15495, 15570, 15673,
       15135, 15291, 15764, 15563, 15551, 15663,...15586, 15490, 15533, 15400, 15315, 15266, 15760, 15756,
       15441, 15698, 15644, 15875, 15620, 15422], dtype=uint16)
 b_f32_rounded = array([0.04589844, 0.03979492, 0.01391602, 0.00205994, 0.00209045,
       0.00382996, 0.01647949, 0.02563477, 0.045166... 0.0703125 , 0.06835938, 0.01275635, 0.05126953, 0.03808594,
       0.12792969, 0.03222656, 0.01159668], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[23.224419 , 25.034065 , 17.452147 , ..., 27.969957 , 20.268974 ,
        19.829994 ],
       [25.034065 , 24.7...730925 ],
       [19.829994 , 20.152054 , 13.218841 , ..., 19.774866 , 27.730925 ,
        39.820786 ]], dtype=float32)
 c_bf16     = array([[16826, 16840, 16780, ..., 16864, 16802, 16799],
       [16840, 16838, 16761, ..., 16843, 16807, 16801],
      ...[16802, 16807, 16722, ..., 16788, 16872, 16862],
       [16799, 16801, 16724, ..., 16798, 16862, 16927]], dtype=uint16)
 c_f32_rounded = array([[23.25  , 25.    , 17.5   , ..., 28.    , 20.25  , 19.875 ],
       [25.    , 24.75  , 15.5625, ..., 25.375 , 2..., 18.5   , 29.    , 27.75  ],
       [19.875 , 20.125 , 13.25  , ..., 19.75  , 27.75  , 39.75  ]],
      dtype=float32)
 capability = 'sve'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.04516602, 0.00024986, 0.0279541 , 0.01904297, 0.0133667 ,
       0.03637695, 0.04931641, 0.05493164, 0.01647...0.875 , 13.125 , ..., 18.5   , 29.    , 27.75  ],
       [19.875 , 20.125 , 13.25  , ..., 19.75  , 27.75  , 39.75  ]]))
        before     = 9888947724356
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.04516602, 0.00024986, 0.0279541 , 0.01904297, 0.0133667 ,
       0.03637695, 0.04931641, 0.05493164, 0.016479...546875 ,
       0.00622559, 0.04418945, 0.01672363, 0.01611328, 0.03662109,
       0.05053711, 0.04736328, 0.00055313])
 y = array([0.04589844, 0.03979492, 0.01391602, 0.00205994, 0.00209045,
       0.00382996, 0.01647949, 0.02563477, 0.045166...0494385,
       0.0703125 , 0.06835938, 0.01275635, 0.05126953, 0.03808594,
       0.12792969, 0.03222656, 0.01159668])
 z = array([[23.25  , 25.    , 17.5   , ..., 28.    , 20.25  , 19.875 ],
       [25.    , 24.75  , 15.5625, ..., 25.375 , 2...20.875 , 13.125 , ..., 18.5   , 29.    , 27.75  ],
       [19.875 , 20.125 , 13.25  , ..., 19.75  , 27.75  , 39.75  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.00073242, -0.03954506,  0.01403809,  0.01698303,  0.01127625,
        0.032547  ,  0.03283691,  0.02929688, ...
       -0.06408691, -0.02416992,  0.00396729, -0.03515625, -0.00146484,
       -0.07739258,  0.01513672, -0.01104355])
 x          = array([0.04516602, 0.00024986, 0.0279541 , 0.01904297, 0.0133667 ,
       0.03637695, 0.04931641, 0.05493164, 0.016479...546875 ,
       0.00622559, 0.04418945, 0.01672363, 0.01611328, 0.03662109,
       0.05053711, 0.04736328, 0.00055313])
 y          = array([0.04589844, 0.03979492, 0.01391602, 0.00205994, 0.00209045,
       0.00382996, 0.01647949, 0.02563477, 0.045166...0494385,
       0.0703125 , 0.06835938, 0.01275635, 0.05126953, 0.03808594,
       0.12792969, 0.03222656, 0.01159668])
 z          = array([[23.25  , 25.    , 17.5   , ..., 28.    , 20.25  , 19.875 ],
       [25.    , 24.75  , 15.5625, ..., 25.375 , 2...20.875 , 13.125 , ..., 18.5   , 29.    , 27.75  ],
       [19.875 , 20.125 , 13.25  , ..., 19.75  , 27.75  , 39.75  ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_f16-mahalanobis-11-1-5] _________________

 ndim = 11, metric = 'mahalanobis', capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.04926773, 0.03461497, 0.10662573, 0.07200979, 0.03471943,
       0.23874196, 0.08042156, 0.11855081, 0.15998055, 0.05062612,
       0.05444134], dtype=float32)
 a_bf16     = array([15690, 15630, 15834, 15763, 15630, 15988, 15781, 15859, 15908,
       15695, 15711], dtype=uint16)
 a_f32_rounded = array([0.04931641, 0.03466797, 0.10644531, 0.07177734, 0.03466797,
       0.23828125, 0.08056641, 0.11865234, 0.16015625, 0.05053711,
       0.05444336], dtype=float32)
 b          = array([0.10897174, 0.0069226 , 0.1354219 , 0.00241825, 0.09824331,
       0.05557749, 0.09283689, 0.08922946, 0.14765663, 0.1153453 ,
       0.14737633], dtype=float32)
 b_bf16     = array([15839, 15331, 15883, 15134, 15817, 15716, 15806, 15799, 15895,
       15852, 15895], dtype=uint16)
 b_f32_rounded = array([0.10888672, 0.00692749, 0.13574219, 0.00241089, 0.09814453,
       0.05566406, 0.09277344, 0.08935547, 0.14746094, 0.11523438,
       0.14746094], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[10.111012 ,  4.8466763,  7.1296444,  5.5370097,  3.3569803,
         4.959389 , 11.605834 ,  7.1982374,  3.938...3,  5.353984 ,
         7.5075536,  6.695008 , 10.67863  , 12.91738  ,  5.175909 ,
         3.7826562]], dtype=float32)
 c_bf16     = array([[16674, 16539, 16612, 16561, 16471, 16543, 16698, 16614, 16508,
        16629, 16621],
       [16539, 16506, 16...4, 16550],
       [16621, 16665, 16644, 16542, 16555, 16624, 16598, 16683, 16719,
        16550, 16498]], dtype=uint16)
 c_f32_rounded = array([[10.125   ,  4.84375 ,  7.125   ,  5.53125 ,  3.359375,  4.96875 ,
        11.625   ,  7.1875  ,  3.9375  ,  7....  4.9375  ,  5.34375 ,  7.5     ,
         6.6875  , 10.6875  , 12.9375  ,  5.1875  ,  3.78125 ]],
      dtype=float32)
 capability = 'sve_f16'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.04931641, 0.03466797, 0.10644531, 0.07177734, 0.03466797,
       0.23828125, 0.08056641, 0.11865234, 0.16015...9.5625  ,  8.25    ,  4.9375  ,  5.34375 ,  7.5     ,
         6.6875  , 10.6875  , 12.9375  ,  5.1875  ,  3.78125 ]]))
        before     = 9889920972385
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.04931641, 0.03466797, 0.10644531, 0.07177734, 0.03466797,
       0.23828125, 0.08056641, 0.11865234, 0.16015625, 0.05053711,
       0.05444336])
 y = array([0.10888672, 0.00692749, 0.13574219, 0.00241089, 0.09814453,
       0.05566406, 0.09277344, 0.08935547, 0.14746094, 0.11523438,
       0.14746094])
 z = array([[10.125   ,  4.84375 ,  7.125   ,  5.53125 ,  3.359375,  4.96875 ,
        11.625   ,  7.1875  ,  3.9375  ,  7.... 9.5625  ,  8.25    ,  4.9375  ,  5.34375 ,  7.5     ,
         6.6875  , 10.6875  , 12.9375  ,  5.1875  ,  3.78125 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.05957031,  0.02774048, -0.02929688,  0.06936646, -0.06347656,
        0.18261719, -0.01220703,  0.02929688,  0.01269531, -0.06469727,
       -0.09301758])
 x          = array([0.04931641, 0.03466797, 0.10644531, 0.07177734, 0.03466797,
       0.23828125, 0.08056641, 0.11865234, 0.16015625, 0.05053711,
       0.05444336])
 y          = array([0.10888672, 0.00692749, 0.13574219, 0.00241089, 0.09814453,
       0.05566406, 0.09277344, 0.08935547, 0.14746094, 0.11523438,
       0.14746094])
 z          = array([[10.125   ,  4.84375 ,  7.125   ,  5.53125 ,  3.359375,  4.96875 ,
        11.625   ,  7.1875  ,  3.9375  ,  7.... 9.5625  ,  8.25    ,  4.9375  ,  5.34375 ,  7.5     ,
         6.6875  , 10.6875  , 12.9375  ,  5.1875  ,  3.78125 ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_f16-mahalanobis-11-2-5] _________________

 ndim = 11, metric = 'mahalanobis', capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.0153596 , 0.06026674, 0.25887257, 0.07886934, 0.15797895,
       0.06965013, 0.00925951, 0.07050892, 0.1714321 , 0.0802885 ,
       0.02751367], dtype=float32)
 a_bf16     = array([15484, 15735, 16005, 15778, 15906, 15759, 15384, 15760, 15920,
       15780, 15585], dtype=uint16)
 a_f32_rounded = array([0.01538086, 0.06030273, 0.25976562, 0.07910156, 0.15820312,
       0.06982422, 0.00927734, 0.0703125 , 0.171875  , 0.08007812,
       0.02746582], dtype=float32)
 b          = array([0.12998584, 0.0200222 , 0.22860451, 0.0586245 , 0.16134793,
       0.06921786, 0.108895  , 0.04049285, 0.1261501 , 0.05564026,
       0.00101897], dtype=float32)
 b_bf16     = array([15877, 15524, 15978, 15728, 15909, 15758, 15839, 15654, 15873,
       15716, 14982], dtype=uint16)
 b_f32_rounded = array([0.12988281, 0.02001953, 0.22851562, 0.05859375, 0.16113281,
       0.06933594, 0.10888672, 0.04052734, 0.12597656, 0.05566406,
       0.00102234], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 9.59287  ,  8.41809  ,  7.47835  ,  9.7457   ,  8.747352 ,
         8.710135 ,  9.076415 , 12.756506 ,  8.122... ,  5.819438 ,
         7.3932285,  6.32382  ,  4.7112317,  9.751526 ,  8.871151 ,
         4.7550893]], dtype=float32)
 c_bf16     = array([[16665, 16647, 16623, 16668, 16652, 16651, 16657, 16716, 16642,
        16593, 16612],
       [16647, 16589, 16...2, 16654],
       [16612, 16454, 16514, 16549, 16570, 16621, 16586, 16535, 16668,
        16654, 16536]], dtype=uint16)
 c_f32_rounded = array([[ 9.5625  ,  8.4375  ,  7.46875 ,  9.75    ,  8.75    ,  8.6875  ,
         9.0625  , 12.75    ,  8.125   ,  6....  5.15625 ,  5.8125  ,  7.40625 ,
         6.3125  ,  4.71875 ,  9.75    ,  8.875   ,  4.75    ]],
      dtype=float32)
 capability = 'sve_f16'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.01538086, 0.06030273, 0.25976562, 0.07910156, 0.15820312,
       0.06982422, 0.00927734, 0.0703125 , 0.17187...3.09375 ,  4.0625  ,  5.15625 ,  5.8125  ,  7.40625 ,
         6.3125  ,  4.71875 ,  9.75    ,  8.875   ,  4.75    ]]))
        before     = 9890560701531
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.01538086, 0.06030273, 0.25976562, 0.07910156, 0.15820312,
       0.06982422, 0.00927734, 0.0703125 , 0.171875  , 0.08007812,
       0.02746582])
 y = array([0.12988281, 0.02001953, 0.22851562, 0.05859375, 0.16113281,
       0.06933594, 0.10888672, 0.04052734, 0.12597656, 0.05566406,
       0.00102234])
 z = array([[ 9.5625  ,  8.4375  ,  7.46875 ,  9.75    ,  8.75    ,  8.6875  ,
         9.0625  , 12.75    ,  8.125   ,  6.... 3.09375 ,  4.0625  ,  5.15625 ,  5.8125  ,  7.40625 ,
         6.3125  ,  4.71875 ,  9.75    ,  8.875   ,  4.75    ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.11450195,  0.0402832 ,  0.03125   ,  0.02050781, -0.00292969,
        0.00048828, -0.09960938,  0.02978516,  0.04589844,  0.02441406,
        0.02644348])
 x          = array([0.01538086, 0.06030273, 0.25976562, 0.07910156, 0.15820312,
       0.06982422, 0.00927734, 0.0703125 , 0.171875  , 0.08007812,
       0.02746582])
 y          = array([0.12988281, 0.02001953, 0.22851562, 0.05859375, 0.16113281,
       0.06933594, 0.10888672, 0.04052734, 0.12597656, 0.05566406,
       0.00102234])
 z          = array([[ 9.5625  ,  8.4375  ,  7.46875 ,  9.75    ,  8.75    ,  8.6875  ,
         9.0625  , 12.75    ,  8.125   ,  6.... 3.09375 ,  4.0625  ,  5.15625 ,  5.8125  ,  7.40625 ,
         6.3125  ,  4.71875 ,  9.75    ,  8.875   ,  4.75    ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_f16-mahalanobis-11-4-5] _________________

 ndim = 11, metric = 'mahalanobis', capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.14565195, 0.05819208, 0.04748193, 0.2874409 , 0.1943495 ,
       0.0921823 , 0.01446106, 0.09838054, 0.03170534, 0.01530994,
       0.01484446], dtype=float32)
 a_bf16     = array([15893, 15726, 15682, 16019, 15943, 15805, 15469, 15817, 15618,
       15483, 15475], dtype=uint16)
 a_f32_rounded = array([0.14550781, 0.05810547, 0.04736328, 0.28710938, 0.19433594,
       0.09228516, 0.01446533, 0.09814453, 0.03173828, 0.01531982,
       0.01483154], dtype=float32)
 b          = array([0.03266111, 0.17632678, 0.1081563 , 0.07595955, 0.16479497,
       0.17448756, 0.02427856, 0.04888924, 0.01897027, 0.1495265 ,
       0.02594909], dtype=float32)
 b_bf16     = array([15622, 15925, 15838, 15772, 15913, 15923, 15559, 15688, 15515,
       15897, 15573], dtype=uint16)
 b_f32_rounded = array([0.03271484, 0.17675781, 0.10839844, 0.07617188, 0.16503906,
       0.17480469, 0.02429199, 0.04882812, 0.0189209 , 0.14941406,
       0.02600098], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 5.136315 ,  6.2325754, 12.3810425,  4.914899 ,  8.578259 ,
         7.5442295,  4.1795692,  3.9128458,  5.261... ,  9.930153 ,
         7.2096906,  7.486208 ,  9.266866 ,  6.2883406,  5.92242  ,
        10.146776 ]], dtype=float32)
 c_bf16     = array([[16548, 16583, 16710, 16541, 16649, 16625, 16518, 16506, 16552,
        16660, 16638],
       [16583, 16552, 16...9, 16574],
       [16638, 16662, 16640, 16672, 16671, 16615, 16624, 16660, 16585,
        16574, 16674]], dtype=uint16)
 c_f32_rounded = array([[ 5.125   ,  6.21875 , 12.375   ,  4.90625 ,  8.5625  ,  7.53125 ,
         4.1875  ,  3.90625 ,  5.25    ,  9.... 10.      ,  9.9375  ,  7.21875 ,
         7.5     ,  9.25    ,  6.28125 ,  5.9375  , 10.125   ]],
      dtype=float32)
 capability = 'sve_f16'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.14550781, 0.05810547, 0.04736328, 0.28710938, 0.19433594,
       0.09228516, 0.01446533, 0.09814453, 0.03173...9.375   ,  8.      , 10.      ,  9.9375  ,  7.21875 ,
         7.5     ,  9.25    ,  6.28125 ,  5.9375  , 10.125   ]]))
        before     = 9891215659082
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.14550781, 0.05810547, 0.04736328, 0.28710938, 0.19433594,
       0.09228516, 0.01446533, 0.09814453, 0.03173828, 0.01531982,
       0.01483154])
 y = array([0.03271484, 0.17675781, 0.10839844, 0.07617188, 0.16503906,
       0.17480469, 0.02429199, 0.04882812, 0.0189209 , 0.14941406,
       0.02600098])
 z = array([[ 5.125   ,  6.21875 , 12.375   ,  4.90625 ,  8.5625  ,  7.53125 ,
         4.1875  ,  3.90625 ,  5.25    ,  9.... 9.375   ,  8.      , 10.      ,  9.9375  ,  7.21875 ,
         7.5     ,  9.25    ,  6.28125 ,  5.9375  , 10.125   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.11279297, -0.11865234, -0.06103516,  0.2109375 ,  0.02929688,
       -0.08251953, -0.00982666,  0.04931641,  0.01281738, -0.13409424,
       -0.01116943])
 x          = array([0.14550781, 0.05810547, 0.04736328, 0.28710938, 0.19433594,
       0.09228516, 0.01446533, 0.09814453, 0.03173828, 0.01531982,
       0.01483154])
 y          = array([0.03271484, 0.17675781, 0.10839844, 0.07617188, 0.16503906,
       0.17480469, 0.02429199, 0.04882812, 0.0189209 , 0.14941406,
       0.02600098])
 z          = array([[ 5.125   ,  6.21875 , 12.375   ,  4.90625 ,  8.5625  ,  7.53125 ,
         4.1875  ,  3.90625 ,  5.25    ,  9.... 9.375   ,  8.      , 10.      ,  9.9375  ,  7.21875 ,
         7.5     ,  9.25    ,  6.28125 ,  5.9375  , 10.125   ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_f16-mahalanobis-16-1-5] _________________

 ndim = 16, metric = 'mahalanobis', capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.00850384, 0.05219959, 0.05629701, 0.03345541, 0.15006867,
       0.03538147, 0.04938668, 0.04307074, 0.00596418, 0.07061088,
       0.07647333, 0.04685768, 0.03975163, 0.1345951 , 0.13748527,
       0.05989851], dtype=float32)
 a_bf16     = array([15371, 15702, 15719, 15625, 15898, 15633, 15690, 15664, 15299,
       15761, 15773, 15680, 15651, 15882, 15885, 15733], dtype=uint16)
 a_f32_rounded = array([0.00848389, 0.05224609, 0.05639648, 0.03344727, 0.15039062,
       0.03540039, 0.04931641, 0.04296875, 0.00595093, 0.07080078,
       0.07666016, 0.046875  , 0.03979492, 0.13476562, 0.13769531,
       0.05981445], dtype=float32)
 b          = array([0.06192426, 0.05232732, 0.05382269, 0.01136145, 0.05122824,
       0.01078354, 0.05788223, 0.07494326, 0.0487954 , 0.1184127 ,
       0.07877402, 0.11728529, 0.0041322 , 0.03210209, 0.07368907,
       0.1525362 ], dtype=float32)
 b_bf16     = array([15742, 15702, 15708, 15418, 15698, 15409, 15725, 15769, 15688,
       15859, 15777, 15856, 15239, 15619, 15767, 15900], dtype=uint16)
 b_f32_rounded = array([0.06201172, 0.05224609, 0.05371094, 0.01135254, 0.05126953,
       0.01080322, 0.05786133, 0.07470703, 0.04882812, 0.11865234,
       0.07861328, 0.1171875 , 0.00411987, 0.03198242, 0.07373047,
       0.15234375], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 7.2783256, 10.459054 ,  7.530288 ,  6.1978793,  8.983226 ,
        12.215012 ,  7.085428 , 10.069019 ,  7.874... , 13.356913 ,
         9.656236 ,  5.375981 , 15.07083  , 16.010855 , 12.678022 ,
        13.506111 ]], dtype=float32)
 c_bf16     = array([[16617, 16679, 16625, 16582, 16656, 16707, 16611, 16673, 16636,
        16691, 16650, 16547, 16661, 16683, 1673...772, 16665, 16708, 16783, 16751, 16713, 16651,
        16726, 16666, 16556, 16753, 16768, 16715, 16728]], dtype=uint16)
 c_f32_rounded = array([[ 7.28125 , 10.4375  ,  7.53125 ,  6.1875  ,  9.      , 12.1875  ,
         7.09375 , 10.0625  ,  7.875   , 11....625  ,  8.6875  , 13.375   ,  9.625   ,  5.375   ,
        15.0625  , 16.      , 12.6875  , 13.5     ]], dtype=float32)
 capability = 'sve_f16'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.00848389, 0.05224609, 0.05639648, 0.03344727, 0.15039062,
       0.03540039, 0.04931641, 0.04296875, 0.00595...4.9375  , 12.5625  ,  8.6875  , 13.375   ,  9.625   ,  5.375   ,
        15.0625  , 16.      , 12.6875  , 13.5     ]]))
        before     = 9891880059474
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.00848389, 0.05224609, 0.05639648, 0.03344727, 0.15039062,
       0.03540039, 0.04931641, 0.04296875, 0.00595093, 0.07080078,
       0.07666016, 0.046875  , 0.03979492, 0.13476562, 0.13769531,
       0.05981445])
 y = array([0.06201172, 0.05224609, 0.05371094, 0.01135254, 0.05126953,
       0.01080322, 0.05786133, 0.07470703, 0.04882812, 0.11865234,
       0.07861328, 0.1171875 , 0.00411987, 0.03198242, 0.07373047,
       0.15234375])
 z = array([[ 7.28125 , 10.4375  ,  7.53125 ,  6.1875  ,  9.      , 12.1875  ,
         7.09375 , 10.0625  ,  7.875   , 11....14.9375  , 12.5625  ,  8.6875  , 13.375   ,  9.625   ,  5.375   ,
        15.0625  , 16.      , 12.6875  , 13.5     ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.05352783,  0.        ,  0.00268555,  0.02209473,  0.09912109,
        0.02459717, -0.00854492, -0.03173828, -0.0428772 , -0.04785156,
       -0.00195312, -0.0703125 ,  0.03567505,  0.1027832 ,  0.06396484,
       -0.0925293 ])
 x          = array([0.00848389, 0.05224609, 0.05639648, 0.03344727, 0.15039062,
       0.03540039, 0.04931641, 0.04296875, 0.00595093, 0.07080078,
       0.07666016, 0.046875  , 0.03979492, 0.13476562, 0.13769531,
       0.05981445])
 y          = array([0.06201172, 0.05224609, 0.05371094, 0.01135254, 0.05126953,
       0.01080322, 0.05786133, 0.07470703, 0.04882812, 0.11865234,
       0.07861328, 0.1171875 , 0.00411987, 0.03198242, 0.07373047,
       0.15234375])
 z          = array([[ 7.28125 , 10.4375  ,  7.53125 ,  6.1875  ,  9.      , 12.1875  ,
         7.09375 , 10.0625  ,  7.875   , 11....14.9375  , 12.5625  ,  8.6875  , 13.375   ,  9.625   ,  5.375   ,
        15.0625  , 16.      , 12.6875  , 13.5     ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_f16-mahalanobis-16-4-5] _________________

 ndim = 16, metric = 'mahalanobis', capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.0165878 , 0.15503395, 0.06849434, 0.06837122, 0.00107771,
       0.05947891, 0.11671968, 0.07291063, 0.01672855, 0.04190735,
       0.00293661, 0.15823454, 0.02906713, 0.05429479, 0.04191168,
       0.09624514], dtype=float32)
 a_bf16     = array([15496, 15903, 15756, 15756, 14989, 15732, 15855, 15765, 15497,
       15660, 15168, 15906, 15598, 15710, 15660, 15813], dtype=uint16)
 a_f32_rounded = array([0.01660156, 0.15527344, 0.06835938, 0.06835938, 0.00107574,
       0.05957031, 0.11669922, 0.07275391, 0.01672363, 0.04199219,
       0.00292969, 0.15820312, 0.02905273, 0.05419922, 0.04199219,
       0.09619141], dtype=float32)
 b          = array([0.05375053, 0.0032062 , 0.04969982, 0.11563677, 0.02762191,
       0.12338343, 0.09446726, 0.01338008, 0.0554614 , 0.06330982,
       0.0213061 , 0.19838421, 0.03552358, 0.01354804, 0.02468205,
       0.1066388 ], dtype=float32)
 b_bf16     = array([15708, 15186, 15692, 15853, 15586, 15869, 15809, 15451, 15715,
       15746, 15535, 15947, 15634, 15454, 15562, 15834], dtype=uint16)
 b_f32_rounded = array([0.05371094, 0.00320435, 0.04980469, 0.11572266, 0.02758789,
       0.12353516, 0.09423828, 0.0133667 , 0.05541992, 0.06347656,
       0.0213623 , 0.19824219, 0.03564453, 0.0135498 , 0.0246582 ,
       0.10644531], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[15.432303 , 11.6915245, 18.243553 , 14.523629 ,  9.465196 ,
        13.370229 , 10.6403885,  9.929929 , 13.331... , 13.391833 ,
        11.886302 , 10.651331 ,  4.809539 , 12.363076 ,  8.548623 ,
        12.915051 ]], dtype=float32)
 c_bf16     = array([[16759, 16699, 16786, 16744, 16663, 16726, 16682, 16671, 16725,
        16709, 16734, 16671, 16651, 16751, 1672...670, 16678, 16607, 16598, 16564, 16537, 16671,
        16726, 16702, 16682, 16538, 16710, 16649, 16719]], dtype=uint16)
 c_f32_rounded = array([[15.4375 , 11.6875 , 18.25   , 14.5    ,  9.4375 , 13.375  ,
        10.625  ,  9.9375 , 13.3125 , 12.3125 , 13...5  ,  4.78125,  9.9375 , 13.375  , 11.875  , 10.625  ,
         4.8125 , 12.375  ,  8.5625 , 12.9375 ]], dtype=float32)
 capability = 'sve_f16'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.01660156, 0.15527344, 0.06835938, 0.06835938, 0.00107574,
       0.05957031, 0.11669922, 0.07275391, 0.01672...
         5.625  ,  4.78125,  9.9375 , 13.375  , 11.875  , 10.625  ,
         4.8125 , 12.375  ,  8.5625 , 12.9375 ]]))
        before     = 9892596284545
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.01660156, 0.15527344, 0.06835938, 0.06835938, 0.00107574,
       0.05957031, 0.11669922, 0.07275391, 0.01672363, 0.04199219,
       0.00292969, 0.15820312, 0.02905273, 0.05419922, 0.04199219,
       0.09619141])
 y = array([0.05371094, 0.00320435, 0.04980469, 0.11572266, 0.02758789,
       0.12353516, 0.09423828, 0.0133667 , 0.05541992, 0.06347656,
       0.0213623 , 0.19824219, 0.03564453, 0.0135498 , 0.0246582 ,
       0.10644531])
 z = array([[15.4375 , 11.6875 , 18.25   , 14.5    ,  9.4375 , 13.375  ,
        10.625  ,  9.9375 , 13.3125 , 12.3125 , 13...,
         5.625  ,  4.78125,  9.9375 , 13.375  , 11.875  , 10.625  ,
         4.8125 , 12.375  ,  8.5625 , 12.9375 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.03710938,  0.15206909,  0.01855469, -0.04736328, -0.02651215,
       -0.06396484,  0.02246094,  0.05938721, -0.03869629, -0.02148438,
       -0.01843262, -0.04003906, -0.0065918 ,  0.04064941,  0.01733398,
       -0.01025391])
 x          = array([0.01660156, 0.15527344, 0.06835938, 0.06835938, 0.00107574,
       0.05957031, 0.11669922, 0.07275391, 0.01672363, 0.04199219,
       0.00292969, 0.15820312, 0.02905273, 0.05419922, 0.04199219,
       0.09619141])
 y          = array([0.05371094, 0.00320435, 0.04980469, 0.11572266, 0.02758789,
       0.12353516, 0.09423828, 0.0133667 , 0.05541992, 0.06347656,
       0.0213623 , 0.19824219, 0.03564453, 0.0135498 , 0.0246582 ,
       0.10644531])
 z          = array([[15.4375 , 11.6875 , 18.25   , 14.5    ,  9.4375 , 13.375  ,
        10.625  ,  9.9375 , 13.3125 , 12.3125 , 13...,
         5.625  ,  4.78125,  9.9375 , 13.375  , 11.875  , 10.625  ,
         4.8125 , 12.375  ,  8.5625 , 12.9375 ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_f16-mahalanobis-16-5-5] _________________

 ndim = 16, metric = 'mahalanobis', capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.10112198, 0.05585487, 0.06311943, 0.05098408, 0.13076104,
       0.1165601 , 0.12375242, 0.01460606, 0.01931072, 0.07572476,
       0.01710824, 0.00178291, 0.1370929 , 0.02890239, 0.02594111,
       0.03737697], dtype=float32)
 a_bf16     = array([15823, 15717, 15745, 15697, 15878, 15855, 15869, 15471, 15518,
       15771, 15500, 15082, 15884, 15597, 15573, 15641], dtype=uint16)
 a_f32_rounded = array([0.10107422, 0.0559082 , 0.06298828, 0.05102539, 0.13085938,
       0.11669922, 0.12353516, 0.0145874 , 0.01928711, 0.07568359,
       0.01708984, 0.00178528, 0.13671875, 0.02893066, 0.02600098,
       0.03735352], dtype=float32)
 b          = array([1.9374566e-01, 2.1314515e-01, 1.2323720e-01, 3.4245055e-02,
       2.0794224e-02, 4.8021208e-02, 2.9141268e-02,... 3.5974875e-02, 5.3254675e-02,
       4.2736135e-02, 5.8644872e-02, 1.9729239e-04, 2.5923470e-02],
      dtype=float32)
 b_bf16     = array([15942, 15962, 15868, 15628, 15530, 15685, 15599, 15669, 15589,
       15688, 15635, 15706, 15663, 15728, 14671, 15572], dtype=uint16)
 b_f32_rounded = array([1.93359375e-01, 2.12890625e-01, 1.23046875e-01, 3.41796875e-02,
       2.07519531e-02, 4.80957031e-02, 2.917480...86719e-02, 5.32226562e-02,
       4.27246094e-02, 5.85937500e-02, 1.97410583e-04, 2.58789062e-02],
      dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[16.287    , 12.015511 ,  8.357934 , 10.112292 , 17.001427 ,
        11.945602 , 13.270984 ,  9.9595995, 13.813... ,  9.491449 ,
         8.277591 ,  8.347737 ,  9.2857895,  8.448024 ,  3.2276368,
         6.024284 ]], dtype=float32)
 c_bf16     = array([[16770, 16704, 16646, 16674, 16776, 16703, 16724, 16671, 16733,
        16776, 16688, 16675, 16720, 16786, 1666...583, 16561, 16654, 16652, 16611, 16528, 16691,
        16664, 16644, 16646, 16661, 16647, 16463, 16577]], dtype=uint16)
 c_f32_rounded = array([[16.25    , 12.      ,  8.375   , 10.125   , 17.      , 11.9375  ,
        13.25    ,  9.9375  , 13.8125  , 17....     , 11.1875  ,  9.5     ,  8.25    ,  8.375   ,
         9.3125  ,  8.4375  ,  3.234375,  6.03125 ]], dtype=float32)
 capability = 'sve_f16'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.10107422, 0.0559082 , 0.06298828, 0.05102539, 0.13085938,
       0.11669922, 0.12353516, 0.0145874 , 0.01928...7.09375 ,  4.5     , 11.1875  ,  9.5     ,  8.25    ,  8.375   ,
         9.3125  ,  8.4375  ,  3.234375,  6.03125 ]]))
        before     = 9893284760354
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.10107422, 0.0559082 , 0.06298828, 0.05102539, 0.13085938,
       0.11669922, 0.12353516, 0.0145874 , 0.01928711, 0.07568359,
       0.01708984, 0.00178528, 0.13671875, 0.02893066, 0.02600098,
       0.03735352])
 y = array([1.93359375e-01, 2.12890625e-01, 1.23046875e-01, 3.41796875e-02,
       2.07519531e-02, 4.80957031e-02, 2.917480...4.88281250e-02, 3.58886719e-02, 5.32226562e-02,
       4.27246094e-02, 5.85937500e-02, 1.97410583e-04, 2.58789062e-02])
 z = array([[16.25    , 12.      ,  8.375   , 10.125   , 17.      , 11.9375  ,
        13.25    ,  9.9375  , 13.8125  , 17.... 7.09375 ,  4.5     , 11.1875  ,  9.5     ,  8.25    ,  8.375   ,
         9.3125  ,  8.4375  ,  3.234375,  6.03125 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.09228516, -0.15698242, -0.06005859,  0.0168457 ,  0.11010742,
        0.06860352,  0.09436035, -0.02960205, -0.00866699,  0.02685547,
       -0.01879883, -0.05143738,  0.09399414, -0.02966309,  0.02580357,
        0.01147461])
 x          = array([0.10107422, 0.0559082 , 0.06298828, 0.05102539, 0.13085938,
       0.11669922, 0.12353516, 0.0145874 , 0.01928711, 0.07568359,
       0.01708984, 0.00178528, 0.13671875, 0.02893066, 0.02600098,
       0.03735352])
 y          = array([1.93359375e-01, 2.12890625e-01, 1.23046875e-01, 3.41796875e-02,
       2.07519531e-02, 4.80957031e-02, 2.917480...4.88281250e-02, 3.58886719e-02, 5.32226562e-02,
       4.27246094e-02, 5.85937500e-02, 1.97410583e-04, 2.58789062e-02])
 z          = array([[16.25    , 12.      ,  8.375   , 10.125   , 17.      , 11.9375  ,
        13.25    ,  9.9375  , 13.8125  , 17.... 7.09375 ,  4.5     , 11.1875  ,  9.5     ,  8.25    ,  8.375   ,
         9.3125  ,  8.4375  ,  3.234375,  6.03125 ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_f16-mahalanobis-33-1-5] _________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.03612476, 0.04430304, 0.0133489 , 0.03208922, 0.02309448,
       0.07040093, 0.02723766, 0.05787247, 0.037760... 0.00607148, 0.02903697, 0.05733823, 0.00383156, 0.02334364,
       0.03398453, 0.02781374, 0.00377272], dtype=float32)
 a_bf16     = array([15636, 15669, 15451, 15619, 15549, 15760, 15583, 15725, 15643,
       15742, 15158, 15666, 15583, 15236, 15673,...15584, 15586, 15722, 15464, 15364, 15321, 15303, 15598,
       15723, 15227, 15551, 15627, 15588, 15223], dtype=uint16)
 a_f32_rounded = array([0.03613281, 0.04418945, 0.0133667 , 0.03198242, 0.02307129,
       0.0703125 , 0.02722168, 0.05786133, 0.037841... 0.006073  , 0.02905273, 0.05737305, 0.00382996, 0.02331543,
       0.03393555, 0.02783203, 0.00376892], dtype=float32)
 b          = array([0.01912296, 0.01241067, 0.02282163, 0.02287125, 0.04627129,
       0.01629195, 0.04442869, 0.09179337, 0.033920... 0.08153468, 0.00644839, 0.03639856, 0.04244276, 0.02152202,
       0.0180564 , 0.02149707, 0.02612033], dtype=float32)
 b_bf16     = array([15517, 15435, 15547, 15547, 15678, 15493, 15670, 15804, 15627,
       15519, 15487, 15404, 15606, 15498, 15647,...15566, 15713, 15554, 15240, 15643, 15625, 15783, 15315,
       15637, 15662, 15536, 15508, 15536, 15574], dtype=uint16)
 b_f32_rounded = array([0.01916504, 0.01239014, 0.02282715, 0.02282715, 0.04638672,
       0.01623535, 0.04443359, 0.09179688, 0.033935... 0.08154297, 0.00643921, 0.03637695, 0.04248047, 0.02148438,
       0.01806641, 0.02148438, 0.02612305], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[22.379408, 22.865492, 24.69397 , ..., 18.617544, 18.168726,
        17.984293],
       [22.865492, 25.772814, ...    22.218037],
       [17.984293, 14.151773, 15.925027, ..., 17.11387 , 22.218037,
        27.801315]], dtype=float32)
 c_bf16     = array([[16819, 16823, 16838, ..., 16789, 16785, 16784],
       [16823, 16846, 16826, ..., 16808, 16822, 16738],
      ...[16785, 16822, 16797, ..., 16800, 16857, 16818],
       [16784, 16738, 16767, ..., 16777, 16818, 16862]], dtype=uint16)
 c_f32_rounded = array([[22.375 , 22.875 , 24.75  , ..., 18.625 , 18.125 , 18.    ],
       [22.875 , 25.75  , 23.25  , ..., 21.    , 2..., 20.    , 27.125 , 22.25  ],
       [18.    , 14.125 , 15.9375, ..., 17.125 , 22.25  , 27.75  ]],
      dtype=float32)
 capability = 'sve_f16'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.03613281, 0.04418945, 0.0133667 , 0.03198242, 0.02307129,
       0.0703125 , 0.02722168, 0.05786133, 0.03784...2.75  , 19.625 , ..., 20.    , 27.125 , 22.25  ],
       [18.    , 14.125 , 15.9375, ..., 17.125 , 22.25  , 27.75  ]]))
        before     = 9893965527160
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.03613281, 0.04418945, 0.0133667 , 0.03198242, 0.02307129,
       0.0703125 , 0.02722168, 0.05786133, 0.037841...0662231,
       0.006073  , 0.02905273, 0.05737305, 0.00382996, 0.02331543,
       0.03393555, 0.02783203, 0.00376892])
 y = array([0.01916504, 0.01239014, 0.02282715, 0.02282715, 0.04638672,
       0.01623535, 0.04443359, 0.09179688, 0.033935...3344727,
       0.08154297, 0.00643921, 0.03637695, 0.04248047, 0.02148438,
       0.01806641, 0.02148438, 0.02612305])
 z = array([[22.375 , 22.875 , 24.75  , ..., 18.625 , 18.125 , 18.    ],
       [22.875 , 25.75  , 23.25  , ..., 21.    , 2...22.75  , 19.625 , ..., 20.    , 27.125 , 22.25  ],
       [18.    , 14.125 , 15.9375, ..., 17.125 , 22.25  , 27.75  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.01696777,  0.03179932, -0.00946045,  0.00915527, -0.02331543,
        0.05407715, -0.01721191, -0.03393555, ...
       -0.07546997,  0.02261353,  0.02099609, -0.03865051,  0.00183105,
        0.01586914,  0.00634766, -0.02235413])
 x          = array([0.03613281, 0.04418945, 0.0133667 , 0.03198242, 0.02307129,
       0.0703125 , 0.02722168, 0.05786133, 0.037841...0662231,
       0.006073  , 0.02905273, 0.05737305, 0.00382996, 0.02331543,
       0.03393555, 0.02783203, 0.00376892])
 y          = array([0.01916504, 0.01239014, 0.02282715, 0.02282715, 0.04638672,
       0.01623535, 0.04443359, 0.09179688, 0.033935...3344727,
       0.08154297, 0.00643921, 0.03637695, 0.04248047, 0.02148438,
       0.01806641, 0.02148438, 0.02612305])
 z          = array([[22.375 , 22.875 , 24.75  , ..., 18.625 , 18.125 , 18.    ],
       [22.875 , 25.75  , 23.25  , ..., 21.    , 2...22.75  , 19.625 , ..., 20.    , 27.125 , 22.25  ],
       [18.    , 14.125 , 15.9375, ..., 17.125 , 22.25  , 27.75  ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_f16-mahalanobis-33-4-5] _________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve_f16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.0257416 , 0.05223571, 0.00262496, 0.02288022, 0.03366261,
       0.04553056, 0.07185807, 0.04194384, 0.013604... 0.02432251, 0.01233046, 0.00543536, 0.03660092, 0.02460533,
       0.03693985, 0.02048707, 0.00699925], dtype=float32)
 a_bf16     = array([15571, 15702, 15148, 15547, 15626, 15674, 15763, 15660, 15455,
       15528, 15666, 15681, 15511, 15604, 15544,...15722, 15499, 15574, 15634, 15532, 15462, 15559, 15434,
       15282, 15638, 15562, 15639, 15528, 15333], dtype=uint16)
 a_f32_rounded = array([0.02575684, 0.05224609, 0.00262451, 0.02282715, 0.03369141,
       0.04541016, 0.07177734, 0.04199219, 0.013610... 0.02429199, 0.0123291 , 0.00543213, 0.03662109, 0.0246582 ,
       0.03686523, 0.02050781, 0.00698853], dtype=float32)
 b          = array([0.00894692, 0.02402186, 0.02219578, 0.03237449, 0.02175906,
       0.00878696, 0.01708378, 0.01477388, 0.018675... 0.06633361, 0.05967328, 0.03239   , 0.03281488, 0.0183346 ,
       0.0185155 , 0.00402311, 0.09196954], dtype=float32)
 b_bf16     = array([15379, 15557, 15542, 15621, 15538, 15376, 15500, 15474, 15513,
       15689, 15379, 15467, 15556, 15513, 15404,...15537, 15574, 15685, 15484, 15696, 15678, 15752, 15732,
       15621, 15622, 15510, 15512, 15236, 15804], dtype=uint16)
 b_f32_rounded = array([0.00897217, 0.02404785, 0.0222168 , 0.0324707 , 0.02172852,
       0.00878906, 0.01708984, 0.01477051, 0.018676... 0.06640625, 0.05957031, 0.0324707 , 0.03271484, 0.01831055,
       0.01855469, 0.00402832, 0.09179688], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[19.777733, 14.849125, 23.920568, ..., 17.461327, 16.718306,
        21.363808],
       [14.849125, 20.337456, ...    11.480794],
       [21.363808, 25.752584, 18.45999 , ..., 20.149975, 11.480794,
        23.735332]], dtype=float32)
 c_bf16     = array([[16798, 16750, 16831, ..., 16780, 16774, 16811],
       [16750, 16803, 16820, ..., 16828, 16785, 16846],
      ...[16774, 16785, 16723, ..., 16781, 16688, 16696],
       [16811, 16846, 16788, ..., 16801, 16696, 16830]], dtype=uint16)
 c_f32_rounded = array([[19.75  , 14.875 , 23.875 , ..., 17.5   , 16.75  , 21.375 ],
       [14.875 , 20.375 , 22.5   , ..., 23.5   , 1..., 17.625 , 11.    , 11.5   ],
       [21.375 , 25.75  , 18.5   , ..., 20.125 , 11.5   , 23.75  ]],
      dtype=float32)
 capability = 'sve_f16'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02575684, 0.05224609, 0.00262451, 0.02282715, 0.03369141,
       0.04541016, 0.07177734, 0.04199219, 0.01361...8.125 , 13.1875, ..., 17.625 , 11.    , 11.5   ],
       [21.375 , 25.75  , 18.5   , ..., 20.125 , 11.5   , 23.75  ]]))
        before     = 9894661203952
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02575684, 0.05224609, 0.00262451, 0.02282715, 0.03369141,
       0.04541016, 0.07177734, 0.04199219, 0.013610...1403809,
       0.02429199, 0.0123291 , 0.00543213, 0.03662109, 0.0246582 ,
       0.03686523, 0.02050781, 0.00698853])
 y = array([0.00897217, 0.02404785, 0.0222168 , 0.0324707 , 0.02172852,
       0.00878906, 0.01708984, 0.01477051, 0.018676...4638672,
       0.06640625, 0.05957031, 0.0324707 , 0.03271484, 0.01831055,
       0.01855469, 0.00402832, 0.09179688])
 z = array([[19.75  , 14.875 , 23.875 , ..., 17.5   , 16.75  , 21.375 ],
       [14.875 , 20.375 , 22.5   , ..., 23.5   , 1...18.125 , 13.1875, ..., 17.625 , 11.    , 11.5   ],
       [21.375 , 25.75  , 18.5   , ..., 20.125 , 11.5   , 23.75  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.01678467,  0.02819824, -0.01959229, -0.00964355,  0.01196289,
        0.03662109,  0.0546875 ,  0.02722168, ...
       -0.04211426, -0.04724121, -0.02703857,  0.00390625,  0.00634766,
        0.01831055,  0.01647949, -0.08480835])
 x          = array([0.02575684, 0.05224609, 0.00262451, 0.02282715, 0.03369141,
       0.04541016, 0.07177734, 0.04199219, 0.013610...1403809,
       0.02429199, 0.0123291 , 0.00543213, 0.03662109, 0.0246582 ,
       0.03686523, 0.02050781, 0.00698853])
 y          = array([0.00897217, 0.02404785, 0.0222168 , 0.0324707 , 0.02172852,
       0.00878906, 0.01708984, 0.01477051, 0.018676...4638672,
       0.06640625, 0.05957031, 0.0324707 , 0.03271484, 0.01831055,
       0.01855469, 0.00402832, 0.09179688])
 z          = array([[19.75  , 14.875 , 23.875 , ..., 17.5   , 16.75  , 21.375 ],
       [14.875 , 20.375 , 22.5   , ..., 23.5   , 1...18.125 , 13.1875, ..., 17.625 , 11.    , 11.5   ],
       [21.375 , 25.75  , 18.5   , ..., 20.125 , 11.5   , 23.75  ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[sve_bf16-mahalanobis-11-1-5] _________________

 ndim = 11, metric = 'mahalanobis', capability = 'sve_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.00289193, 0.21629336, 0.00986995, 0.05156491, 0.00560211,
       0.11974879, 0.05642359, 0.07347157, 0.21943717, 0.04193787,
       0.20275868], dtype=float32)
 a_bf16     = array([15166, 15965, 15394, 15699, 15288, 15861, 15719, 15766, 15969,
       15660, 15952], dtype=uint16)
 a_f32_rounded = array([0.00289917, 0.21582031, 0.0098877 , 0.05151367, 0.00561523,
       0.11962891, 0.05639648, 0.07324219, 0.21972656, 0.04199219,
       0.203125  ], dtype=float32)
 b          = array([0.10252046, 0.10414574, 0.2491743 , 0.05040006, 0.12903821,
       0.10391913, 0.03044988, 0.08727252, 0.01542306, 0.08973265,
       0.03792394], dtype=float32)
 b_bf16     = array([15826, 15829, 15999, 15694, 15876, 15829, 15609, 15795, 15485,
       15800, 15643], dtype=uint16)
 b_f32_rounded = array([0.10253906, 0.10400391, 0.24902344, 0.05029297, 0.12890625,
       0.10400391, 0.03039551, 0.08740234, 0.01544189, 0.08984375,
       0.0378418 ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[13.652869 ,  6.847098 ,  7.144033 , 10.244878 ,  4.412858 ,
         7.22319  ,  9.4575815,  5.446012 ,  8.353... , 10.821735 ,
         8.434992 ,  9.508287 ,  5.734198 ,  9.282652 , 10.37155  ,
         6.1829686]], dtype=float32)
 c_bf16     = array([[16730, 16603, 16613, 16676, 16525, 16615, 16663, 16558, 16646,
        16603, 16691],
       [16603, 16657, 16...7, 16678],
       [16691, 16707, 16643, 16685, 16685, 16647, 16664, 16567, 16661,
        16678, 16582]], dtype=uint16)
 c_f32_rounded = array([[13.625   ,  6.84375 ,  7.15625 , 10.25    ,  4.40625 ,  7.21875 ,
         9.4375  ,  5.4375  ,  8.375   ,  6.... 10.8125  , 10.8125  ,  8.4375  ,
         9.5     ,  5.71875 ,  9.3125  , 10.375   ,  6.1875  ]],
      dtype=float32)
 capability = 'sve_bf16'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.00289917, 0.21582031, 0.0098877 , 0.05151367, 0.00561523,
       0.11962891, 0.05639648, 0.07324219, 0.21972...2.1875  ,  8.1875  , 10.8125  , 10.8125  ,  8.4375  ,
         9.5     ,  5.71875 ,  9.3125  , 10.375   ,  6.1875  ]]))
        before     = 9895603352665
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.00289917, 0.21582031, 0.0098877 , 0.05151367, 0.00561523,
       0.11962891, 0.05639648, 0.07324219, 0.21972656, 0.04199219,
       0.203125  ])
 y = array([0.10253906, 0.10400391, 0.24902344, 0.05029297, 0.12890625,
       0.10400391, 0.03039551, 0.08740234, 0.01544189, 0.08984375,
       0.0378418 ])
 z = array([[13.625   ,  6.84375 ,  7.15625 , 10.25    ,  4.40625 ,  7.21875 ,
         9.4375  ,  5.4375  ,  8.375   ,  6....12.1875  ,  8.1875  , 10.8125  , 10.8125  ,  8.4375  ,
         9.5     ,  5.71875 ,  9.3125  , 10.375   ,  6.1875  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.09963989,  0.11181641, -0.23913574,  0.0012207 , -0.12329102,
        0.015625  ,  0.02600098, -0.01416016,  0.20428467, -0.04785156,
        0.1652832 ])
 x          = array([0.00289917, 0.21582031, 0.0098877 , 0.05151367, 0.00561523,
       0.11962891, 0.05639648, 0.07324219, 0.21972656, 0.04199219,
       0.203125  ])
 y          = array([0.10253906, 0.10400391, 0.24902344, 0.05029297, 0.12890625,
       0.10400391, 0.03039551, 0.08740234, 0.01544189, 0.08984375,
       0.0378418 ])
 z          = array([[13.625   ,  6.84375 ,  7.15625 , 10.25    ,  4.40625 ,  7.21875 ,
         9.4375  ,  5.4375  ,  8.375   ,  6....12.1875  ,  8.1875  , 10.8125  , 10.8125  ,  8.4375  ,
         9.5     ,  5.71875 ,  9.3125  , 10.375   ,  6.1875  ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[sve_bf16-mahalanobis-11-2-5] _________________

 ndim = 11, metric = 'mahalanobis', capability = 'sve_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.08344672, 0.05847009, 0.09734607, 0.02515612, 0.24624072,
       0.05182034, 0.08486824, 0.00949677, 0.2527456 , 0.0361492 ,
       0.05426025], dtype=float32)
 a_bf16     = array([15787, 15727, 15815, 15566, 15996, 15700, 15790, 15388, 16001,
       15636, 15710], dtype=uint16)
 a_f32_rounded = array([0.08349609, 0.05834961, 0.09716797, 0.02514648, 0.24609375,
       0.05175781, 0.08496094, 0.00952148, 0.25195312, 0.03613281,
       0.05419922], dtype=float32)
 b          = array([0.05891331, 0.04000203, 0.13185027, 0.10043657, 0.07754947,
       0.04359311, 0.1584271 , 0.11146448, 0.05451452, 0.17810412,
       0.04514501], dtype=float32)
 b_bf16     = array([15729, 15652, 15879, 15822, 15775, 15667, 15906, 15844, 15711,
       15926, 15673], dtype=uint16)
 b_f32_rounded = array([0.05883789, 0.04003906, 0.13183594, 0.10058594, 0.07763672,
       0.04370117, 0.15820312, 0.11132812, 0.05444336, 0.17773438,
       0.04516602], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 7.516326 ,  5.2580943,  8.789218 ,  8.778402 ,  5.3952928,
         7.4089212,  6.8629003,  7.4512057,  3.819... ,  5.453742 ,
         4.112225 ,  5.2392583,  4.5382304,  2.6968353,  5.197224 ,
         4.3599777]], dtype=float32)
 c_bf16     = array([[16625, 16552, 16653, 16652, 16557, 16621, 16604, 16622, 16500,
        16606, 16519],
       [16552, 16641, 16...4, 16550],
       [16519, 16441, 16589, 16543, 16559, 16516, 16552, 16529, 16429,
        16550, 16524]], dtype=uint16)
 c_f32_rounded = array([[ 7.53125 ,  5.25    ,  8.8125  ,  8.75    ,  5.40625 ,  7.40625 ,
         6.875   ,  7.4375  ,  3.8125  ,  6....  4.96875 ,  5.46875 ,  4.125   ,
         5.25    ,  4.53125 ,  2.703125,  5.1875  ,  4.375   ]],
      dtype=float32)
 capability = 'sve_bf16'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.08349609, 0.05834961, 0.09716797, 0.02514648, 0.24609375,
       0.05175781, 0.08496094, 0.00952148, 0.25195...2.890625,  6.40625 ,  4.96875 ,  5.46875 ,  4.125   ,
         5.25    ,  4.53125 ,  2.703125,  5.1875  ,  4.375   ]]))
        before     = 9896246039503
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.08349609, 0.05834961, 0.09716797, 0.02514648, 0.24609375,
       0.05175781, 0.08496094, 0.00952148, 0.25195312, 0.03613281,
       0.05419922])
 y = array([0.05883789, 0.04003906, 0.13183594, 0.10058594, 0.07763672,
       0.04370117, 0.15820312, 0.11132812, 0.05444336, 0.17773438,
       0.04516602])
 z = array([[ 7.53125 ,  5.25    ,  8.8125  ,  8.75    ,  5.40625 ,  7.40625 ,
         6.875   ,  7.4375  ,  3.8125  ,  6.... 2.890625,  6.40625 ,  4.96875 ,  5.46875 ,  4.125   ,
         5.25    ,  4.53125 ,  2.703125,  5.1875  ,  4.375   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.0246582 ,  0.01831055, -0.03466797, -0.07543945,  0.16845703,
        0.00805664, -0.07324219, -0.10180664,  0.19750977, -0.14160156,
        0.0090332 ])
 x          = array([0.08349609, 0.05834961, 0.09716797, 0.02514648, 0.24609375,
       0.05175781, 0.08496094, 0.00952148, 0.25195312, 0.03613281,
       0.05419922])
 y          = array([0.05883789, 0.04003906, 0.13183594, 0.10058594, 0.07763672,
       0.04370117, 0.15820312, 0.11132812, 0.05444336, 0.17773438,
       0.04516602])
 z          = array([[ 7.53125 ,  5.25    ,  8.8125  ,  8.75    ,  5.40625 ,  7.40625 ,
         6.875   ,  7.4375  ,  3.8125  ,  6.... 2.890625,  6.40625 ,  4.96875 ,  5.46875 ,  4.125   ,
         5.25    ,  4.53125 ,  2.703125,  5.1875  ,  4.375   ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[sve_bf16-mahalanobis-33-1-5] _________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.02394023, 0.07048246, 0.04226622, 0.03912506, 0.02850803,
       0.05888268, 0.03072386, 0.0090103 , 0.006514... 0.02478924, 0.03727146, 0.06475735, 0.0110235 , 0.03625676,
       0.03545155, 0.01169913, 0.00753283], dtype=float32)
 a_bf16     = array([15556, 15760, 15661, 15648, 15594, 15729, 15612, 15380, 15317,
       15603, 15761, 15355, 15650, 15556, 15548,...15593, 15495, 15778, 15636, 15421, 15548, 15563, 15641,
       15749, 15413, 15637, 15633, 15424, 15351], dtype=uint16)
 a_f32_rounded = array([0.02392578, 0.0703125 , 0.04223633, 0.0390625 , 0.02856445,
       0.05883789, 0.03076172, 0.0090332 , 0.006500... 0.02478027, 0.03735352, 0.06494141, 0.01104736, 0.03637695,
       0.03540039, 0.01171875, 0.00753784], dtype=float32)
 b          = array([0.00499834, 0.01051107, 0.00820612, 0.0229125 , 0.05169868,
       0.05430333, 0.08736934, 0.02625121, 0.002290... 0.03202008, 0.01508188, 0.00502281, 0.02147752, 0.00095289,
       0.03612235, 0.02318516, 0.01339025], dtype=float32)
 b_bf16     = array([15268, 15404, 15366, 15548, 15700, 15710, 15795, 15575, 15126,
       15475, 15609, 15641, 15601, 15689, 15670,...15666, 15759, 15722, 15373, 15479, 15630, 15619, 15479,
       15269, 15536, 14970, 15636, 15550, 15451], dtype=uint16)
 b_f32_rounded = array([0.00500488, 0.01049805, 0.00817871, 0.02294922, 0.05175781,
       0.05419922, 0.08740234, 0.02624512, 0.002288... 0.03198242, 0.01507568, 0.0050354 , 0.02148438, 0.00095367,
       0.03613281, 0.02319336, 0.0133667 ], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[18.52824  , 15.0219965, 17.09649  , ..., 21.31457  , 22.539583 ,
        15.409855 ],
       [15.0219965, 18.2...228725 ],
       [15.409855 , 18.069408 , 12.680773 , ..., 24.789454 , 24.228725 ,
        33.520374 ]], dtype=float32)
 c_bf16     = array([[16788, 16752, 16777, ..., 16811, 16820, 16759],
       [16752, 16786, 16819, ..., 16790, 16832, 16785],
      ...[16820, 16832, 16761, ..., 16796, 16795, 16834],
       [16759, 16785, 16715, ..., 16838, 16834, 16902]], dtype=uint16)
 c_f32_rounded = array([[18.5   , 15.    , 17.125 , ..., 21.375 , 22.5   , 15.4375],
       [15.    , 18.25  , 22.375 , ..., 18.75  , 2..., 19.5   , 19.375 , 24.25  ],
       [15.4375, 18.125 , 12.6875, ..., 24.75  , 24.25  , 33.5   ]],
      dtype=float32)
 capability = 'sve_bf16'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02392578, 0.0703125 , 0.04223633, 0.0390625 , 0.02856445,
       0.05883789, 0.03076172, 0.0090332 , 0.00650...4.    , 15.5625, ..., 19.5   , 19.375 , 24.25  ],
       [15.4375, 18.125 , 12.6875, ..., 24.75  , 24.25  , 33.5   ]]))
        before     = 9897036315234
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02392578, 0.0703125 , 0.04223633, 0.0390625 , 0.02856445,
       0.05883789, 0.03076172, 0.0090332 , 0.006500...2294922,
       0.02478027, 0.03735352, 0.06494141, 0.01104736, 0.03637695,
       0.03540039, 0.01171875, 0.00753784])
 y = array([0.00500488, 0.01049805, 0.00817871, 0.02294922, 0.05175781,
       0.05419922, 0.08740234, 0.02624512, 0.002288...3466797,
       0.03198242, 0.01507568, 0.0050354 , 0.02148438, 0.00095367,
       0.03613281, 0.02319336, 0.0133667 ])
 z = array([[18.5   , 15.    , 17.125 , ..., 21.375 , 22.5   , 15.4375],
       [15.    , 18.25  , 22.375 , ..., 18.75  , 2...24.    , 15.5625, ..., 19.5   , 19.375 , 24.25  ],
       [15.4375, 18.125 , 12.6875, ..., 24.75  , 24.25  , 33.5   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.0189209 ,  0.05981445,  0.03405762,  0.01611328, -0.02319336,
        0.00463867, -0.05664062, -0.01721191, ...
       -0.00720215,  0.02227783,  0.05990601, -0.01043701,  0.03542328,
       -0.00073242, -0.01147461, -0.00582886])
 x          = array([0.02392578, 0.0703125 , 0.04223633, 0.0390625 , 0.02856445,
       0.05883789, 0.03076172, 0.0090332 , 0.006500...2294922,
       0.02478027, 0.03735352, 0.06494141, 0.01104736, 0.03637695,
       0.03540039, 0.01171875, 0.00753784])
 y          = array([0.00500488, 0.01049805, 0.00817871, 0.02294922, 0.05175781,
       0.05419922, 0.08740234, 0.02624512, 0.002288...3466797,
       0.03198242, 0.01507568, 0.0050354 , 0.02148438, 0.00095367,
       0.03613281, 0.02319336, 0.0133667 ])
 z          = array([[18.5   , 15.    , 17.125 , ..., 21.375 , 22.5   , 15.4375],
       [15.    , 18.25  , 22.375 , ..., 18.75  , 2...24.    , 15.5625, ..., 19.5   , 19.375 , 24.25  ],
       [15.4375, 18.125 , 12.6875, ..., 24.75  , 24.25  , 33.5   ]])

 scripts/test.py:152: RuntimeWarning
 ________________ test_curved_bf16[sve_bf16-mahalanobis-33-5-5] _________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve_bf16'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.01604493, 0.05928836, 0.03121943, 0.01535778, 0.0416676 ,
       0.05587472, 0.01491626, 0.00729312, 0.002271... 0.0175678 , 0.03946678, 0.03870788, 0.01834612, 0.06190523,
       0.0742253 , 0.00683808, 0.00406409], dtype=float32)
 a_bf16     = array([15491, 15731, 15616, 15484, 15659, 15717, 15476, 15343, 15125,
       15260, 15539, 15601, 15651, 15749, 15340,...15563, 15750, 15244, 15496, 15718, 15733, 15504, 15650,
       15647, 15510, 15742, 15768, 15328, 15237], dtype=uint16)
 a_f32_rounded = array([0.01599121, 0.05932617, 0.03125   , 0.01538086, 0.04174805,
       0.0559082 , 0.01489258, 0.0072937 , 0.002273... 0.01757812, 0.03955078, 0.03881836, 0.01831055, 0.06201172,
       0.07421875, 0.00683594, 0.00405884], dtype=float32)
 b          = array([0.005725  , 0.0467917 , 0.05407727, 0.00912346, 0.02236211,
       0.06374967, 0.02140246, 0.02570161, 0.027863... 0.00448562, 0.00947761, 0.04633753, 0.03215646, 0.05336451,
       0.01381808, 0.0246359 , 0.00270327], dtype=float32)
 b_bf16     = array([15292, 15680, 15710, 15381, 15543, 15747, 15535, 15571, 15588,
       15620, 15644, 15668, 15817, 15366, 15521,...15759, 15185, 15588, 15478, 15577, 15708, 15251, 15387,
       15678, 15620, 15707, 15458, 15562, 15153], dtype=uint16)
 b_f32_rounded = array([0.0057373 , 0.046875  , 0.05419922, 0.00909424, 0.02233887,
       0.06396484, 0.0213623 , 0.02575684, 0.027832... 0.00448608, 0.00946045, 0.04638672, 0.03222656, 0.0534668 ,
       0.01379395, 0.0246582 , 0.00270081], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[18.521978, 16.80983 , 21.232533, ..., 30.151127, 16.457928,
        22.922989],
       [16.80983 , 16.950024, ...    16.48926 ],
       [22.922989, 23.434563, 25.308376, ..., 16.959446, 16.48926 ,
        18.411568]], dtype=float32)
 c_bf16     = array([[16788, 16774, 16810, ..., 16881, 16772, 16823],
       [16774, 16776, 16842, ..., 16871, 16770, 16827],
      ...[16772, 16770, 16831, ..., 16830, 16802, 16772],
       [16823, 16827, 16842, ..., 16776, 16772, 16787]], dtype=uint16)
 c_f32_rounded = array([[18.5  , 16.75 , 21.25 , ..., 30.125, 16.5  , 22.875],
       [16.75 , 17.   , 25.25 , ..., 28.875, 16.25 , 23.....875, ..., 23.75 , 20.25 , 16.5  ],
       [22.875, 23.375, 25.25 , ..., 17.   , 16.5  , 18.375]],
      dtype=float32)
 capability = 'sve_bf16'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.01599121, 0.05932617, 0.03125   , 0.01538086, 0.04174805,
       0.0559082 , 0.01489258, 0.0072937 , 0.00227... [16.5  , 16.25 , 23.875, ..., 23.75 , 20.25 , 16.5  ],
       [22.875, 23.375, 25.25 , ..., 17.   , 16.5  , 18.375]]))
        before     = 9897753617084
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.01599121, 0.05932617, 0.03125   , 0.01538086, 0.04174805,
       0.0559082 , 0.01489258, 0.0072937 , 0.002273...5981445,
       0.01757812, 0.03955078, 0.03881836, 0.01831055, 0.06201172,
       0.07421875, 0.00683594, 0.00405884])
 y = array([0.0057373 , 0.046875  , 0.05419922, 0.00909424, 0.02233887,
       0.06396484, 0.0213623 , 0.02575684, 0.027832...5371094,
       0.00448608, 0.00946045, 0.04638672, 0.03222656, 0.0534668 ,
       0.01379395, 0.0246582 , 0.00270081])
 z = array([[18.5  , 16.75 , 21.25 , ..., 30.125, 16.5  , 22.875],
       [16.75 , 17.   , 25.25 , ..., 28.875, 16.25 , 23....  [16.5  , 16.25 , 23.875, ..., 23.75 , 20.25 , 16.5  ],
       [22.875, 23.375, 25.25 , ..., 17.   , 16.5  , 18.375]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.01025391,  0.01245117, -0.02294922,  0.00628662,  0.01940918,
       -0.00805664, -0.00646973, -0.01846313, ...
        0.01309204,  0.03009033, -0.00756836, -0.01391602,  0.00854492,
        0.0604248 , -0.01782227,  0.00135803])
 x          = array([0.01599121, 0.05932617, 0.03125   , 0.01538086, 0.04174805,
       0.0559082 , 0.01489258, 0.0072937 , 0.002273...5981445,
       0.01757812, 0.03955078, 0.03881836, 0.01831055, 0.06201172,
       0.07421875, 0.00683594, 0.00405884])
 y          = array([0.0057373 , 0.046875  , 0.05419922, 0.00909424, 0.02233887,
       0.06396484, 0.0213623 , 0.02575684, 0.027832...5371094,
       0.00448608, 0.00946045, 0.04638672, 0.03222656, 0.0534668 ,
       0.01379395, 0.0246582 , 0.00270081])
 z          = array([[18.5  , 16.75 , 21.25 , ..., 30.125, 16.5  , 22.875],
       [16.75 , 17.   , 25.25 , ..., 28.875, 16.25 , 23....  [16.5  , 16.25 , 23.875, ..., 23.75 , 20.25 , 16.5  ],
       [22.875, 23.375, 25.25 , ..., 17.   , 16.5  , 18.375]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_i8-mahalanobis-11-1-5] __________________

 ndim = 11, metric = 'mahalanobis', capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.04873803, 0.02215843, 0.08404689, 0.14022177, 0.09876008,
       0.0758478 , 0.01946311, 0.29596695, 0.1285676 , 0.0397163 ,
       0.04651314], dtype=float32)
 a_bf16     = array([15688, 15542, 15788, 15888, 15818, 15771, 15519, 16024, 15876,
       15651, 15679], dtype=uint16)
 a_f32_rounded = array([0.04882812, 0.0222168 , 0.08398438, 0.140625  , 0.09863281,
       0.07568359, 0.01940918, 0.296875  , 0.12890625, 0.03979492,
       0.04663086], dtype=float32)
 b          = array([0.00775113, 0.09075853, 0.02688486, 0.06105594, 0.06896565,
       0.07600538, 0.15450206, 0.14971958, 0.07302797, 0.15964274,
       0.13168612], dtype=float32)
 b_bf16     = array([15358, 15802, 15580, 15738, 15757, 15772, 15902, 15897, 15766,
       15907, 15879], dtype=uint16)
 b_f32_rounded = array([0.00775146, 0.09082031, 0.02685547, 0.06103516, 0.06884766,
       0.07617188, 0.15429688, 0.14941406, 0.07324219, 0.15917969,
       0.13183594], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 4.7186184,  8.257498 ,  7.4146886,  8.521214 ,  5.107907 ,
         6.041201 , 12.809752 ,  5.143927 ,  6.541...7,  8.76684  ,
         8.489734 ,  8.451725 , 10.172067 ,  9.699949 ,  8.307078 ,
         8.163037 ]], dtype=float32)
 c_bf16     = array([[16535, 16644, 16621, 16648, 16547, 16577, 16717, 16549, 16593,
        16528, 16656],
       [16644, 16613, 16...9, 16645],
       [16656, 16648, 16577, 16614, 16652, 16648, 16647, 16675, 16667,
        16645, 16643]], dtype=uint16)
 c_f32_rounded = array([[ 4.71875 ,  8.25    ,  7.40625 ,  8.5     ,  5.09375 ,  6.03125 ,
        12.8125  ,  5.15625 ,  6.53125 ,  4....  7.1875  ,  8.75    ,  8.5     ,
         8.4375  , 10.1875  ,  9.6875  ,  8.3125  ,  8.1875  ]],
      dtype=float32)
 capability = 'sve_i8'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.04882812, 0.0222168 , 0.08398438, 0.140625  , 0.09863281,
       0.07568359, 0.01940918, 0.296875  , 0.12890...8.5     ,  6.03125 ,  7.1875  ,  8.75    ,  8.5     ,
         8.4375  , 10.1875  ,  9.6875  ,  8.3125  ,  8.1875  ]]))
        before     = 9898643882810
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.04882812, 0.0222168 , 0.08398438, 0.140625  , 0.09863281,
       0.07568359, 0.01940918, 0.296875  , 0.12890625, 0.03979492,
       0.04663086])
 y = array([0.00775146, 0.09082031, 0.02685547, 0.06103516, 0.06884766,
       0.07617188, 0.15429688, 0.14941406, 0.07324219, 0.15917969,
       0.13183594])
 z = array([[ 4.71875 ,  8.25    ,  7.40625 ,  8.5     ,  5.09375 ,  6.03125 ,
        12.8125  ,  5.15625 ,  6.53125 ,  4.... 8.5     ,  6.03125 ,  7.1875  ,  8.75    ,  8.5     ,
         8.4375  , 10.1875  ,  9.6875  ,  8.3125  ,  8.1875  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.04107666, -0.06860352,  0.05712891,  0.07958984,  0.02978516,
       -0.00048828, -0.1348877 ,  0.14746094,  0.05566406, -0.11938477,
       -0.08520508])
 x          = array([0.04882812, 0.0222168 , 0.08398438, 0.140625  , 0.09863281,
       0.07568359, 0.01940918, 0.296875  , 0.12890625, 0.03979492,
       0.04663086])
 y          = array([0.00775146, 0.09082031, 0.02685547, 0.06103516, 0.06884766,
       0.07617188, 0.15429688, 0.14941406, 0.07324219, 0.15917969,
       0.13183594])
 z          = array([[ 4.71875 ,  8.25    ,  7.40625 ,  8.5     ,  5.09375 ,  6.03125 ,
        12.8125  ,  5.15625 ,  6.53125 ,  4.... 8.5     ,  6.03125 ,  7.1875  ,  8.75    ,  8.5     ,
         8.4375  , 10.1875  ,  9.6875  ,  8.3125  ,  8.1875  ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_i8-mahalanobis-11-4-5] __________________

 ndim = 11, metric = 'mahalanobis', capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.09193204, 0.14101173, 0.09445892, 0.13600038, 0.04156505,
       0.0945152 , 0.22333974, 0.06995235, 0.03840705, 0.00204569,
       0.06677172], dtype=float32)
 a_bf16     = array([15804, 15888, 15809, 15883, 15658, 15810, 15973, 15759, 15645,
       15110, 15753], dtype=uint16)
 a_f32_rounded = array([0.09179688, 0.140625  , 0.09423828, 0.13574219, 0.04150391,
       0.09472656, 0.22363281, 0.06982422, 0.03833008, 0.00204468,
       0.06689453], dtype=float32)
 b          = array([0.01880166, 0.01028811, 0.04114969, 0.22190681, 0.01573187,
       0.1835553 , 0.23876238, 0.05938241, 0.06587573, 0.12705001,
       0.01749595], dtype=float32)
 b_bf16     = array([15514, 15401, 15657, 15971, 15489, 15932, 15988, 15731, 15751,
       15874, 15503], dtype=uint16)
 b_f32_rounded = array([0.01879883, 0.01031494, 0.04125977, 0.22167969, 0.01574707,
       0.18359375, 0.23828125, 0.05932617, 0.06591797, 0.12695312,
       0.01745605], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[ 6.415643 ,  7.7115183,  7.960109 , 15.2143755, 16.742046 ,
        10.755242 , 11.794929 , 10.482337 ,  8.980... ,  7.0347505,
         7.115666 ,  7.1283545,  6.422183 ,  5.5347395,  8.220237 ,
         8.066778 ]], dtype=float32)
 c_bf16     = array([[16589, 16631, 16639, 16755, 16774, 16684, 16701, 16680, 16656,
        16719, 16654],
       [16631, 16564, 16...7, 16644],
       [16654, 16551, 16654, 16680, 16609, 16612, 16612, 16590, 16561,
        16644, 16641]], dtype=uint16)
 c_f32_rounded = array([[ 6.40625 ,  7.71875 ,  7.96875 , 15.1875  , 16.75    , 10.75    ,
        11.8125  , 10.5     ,  9.      , 12.... 10.5     ,  7.03125 ,  7.125   ,
         7.125   ,  6.4375  ,  5.53125 ,  8.25    ,  8.0625  ]],
      dtype=float32)
 capability = 'sve_i8'
 metric     = 'mahalanobis'
 ndim       = 11
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.09179688, 0.140625  , 0.09423828, 0.13574219, 0.04150391,
       0.09472656, 0.22363281, 0.06982422, 0.03833...5.21875 ,  8.875   , 10.5     ,  7.03125 ,  7.125   ,
         7.125   ,  6.4375  ,  5.53125 ,  8.25    ,  8.0625  ]]))
        before     = 9899310704525
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.09179688, 0.140625  , 0.09423828, 0.13574219, 0.04150391,
       0.09472656, 0.22363281, 0.06982422, 0.03833008, 0.00204468,
       0.06689453])
 y = array([0.01879883, 0.01031494, 0.04125977, 0.22167969, 0.01574707,
       0.18359375, 0.23828125, 0.05932617, 0.06591797, 0.12695312,
       0.01745605])
 z = array([[ 6.40625 ,  7.71875 ,  7.96875 , 15.1875  , 16.75    , 10.75    ,
        11.8125  , 10.5     ,  9.      , 12.... 5.21875 ,  8.875   , 10.5     ,  7.03125 ,  7.125   ,
         7.125   ,  6.4375  ,  5.53125 ,  8.25    ,  8.0625  ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.07299805,  0.13031006,  0.05297852, -0.0859375 ,  0.02575684,
       -0.08886719, -0.01464844,  0.01049805, -0.02758789, -0.12490845,
        0.04943848])
 x          = array([0.09179688, 0.140625  , 0.09423828, 0.13574219, 0.04150391,
       0.09472656, 0.22363281, 0.06982422, 0.03833008, 0.00204468,
       0.06689453])
 y          = array([0.01879883, 0.01031494, 0.04125977, 0.22167969, 0.01574707,
       0.18359375, 0.23828125, 0.05932617, 0.06591797, 0.12695312,
       0.01745605])
 z          = array([[ 6.40625 ,  7.71875 ,  7.96875 , 15.1875  , 16.75    , 10.75    ,
        11.8125  , 10.5     ,  9.      , 12.... 5.21875 ,  8.875   , 10.5     ,  7.03125 ,  7.125   ,
         7.125   ,  6.4375  ,  5.53125 ,  8.25    ,  8.0625  ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_i8-mahalanobis-16-2-5] __________________

 ndim = 16, metric = 'mahalanobis', capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.03220052, 0.15269181, 0.00595575, 0.0108981 , 0.10456049,
       0.07730077, 0.00253006, 0.06701224, 0.03447325, 0.06165242,
       0.01882213, 0.0464462 , 0.01208235, 0.1207922 , 0.11628418,
       0.13629755], dtype=float32)
 a_bf16     = array([15620, 15900, 15299, 15411, 15830, 15774, 15142, 15753, 15629,
       15741, 15514, 15678, 15430, 15863, 15854, 15884], dtype=uint16)
 a_f32_rounded = array([0.03222656, 0.15234375, 0.00595093, 0.01092529, 0.10449219,
       0.07714844, 0.00253296, 0.06689453, 0.03442383, 0.06176758,
       0.01879883, 0.04638672, 0.01208496, 0.12060547, 0.11621094,
       0.13671875], dtype=float32)
 b          = array([0.03074787, 0.04760265, 0.03024555, 0.0902034 , 0.01970374,
       0.08658235, 0.03411619, 0.02933606, 0.12232497, 0.05400912,
       0.04814543, 0.05746931, 0.08688925, 0.09574641, 0.11143338,
       0.05544434], dtype=float32)
 b_bf16     = array([15612, 15683, 15608, 15801, 15521, 15793, 15628, 15600, 15867,
       15709, 15685, 15723, 15794, 15812, 15844, 15715], dtype=uint16)
 b_f32_rounded = array([0.03076172, 0.04760742, 0.03027344, 0.09033203, 0.01965332,
       0.08642578, 0.03417969, 0.02929688, 0.12255859, 0.05395508,
       0.0480957 , 0.05737305, 0.08691406, 0.09570312, 0.11132812,
       0.05541992], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[12.721489 , 11.188417 , 15.868941 , 13.789254 , 11.104952 ,
        11.883147 , 11.039552 ,  9.04351  , 13.099... , 11.802881 ,
        10.556558 , 10.261403 ,  8.122999 ,  8.510162 , 13.121455 ,
         8.742433 ]], dtype=float32)
 c_bf16     = array([[16716, 16691, 16766, 16733, 16690, 16702, 16689, 16657, 16722,
        16772, 16789, 16697, 16647, 16685, 1678...773, 16682, 16657, 16699, 16736, 16631, 16612,
        16701, 16681, 16676, 16642, 16648, 16722, 16652]], dtype=uint16)
 c_f32_rounded = array([[12.75   , 11.1875 , 15.875  , 13.8125 , 11.125  , 11.875  ,
        11.0625 ,  9.0625 , 13.125  , 16.5    , 18...   ,  7.71875,  7.125  , 11.8125 , 10.5625 , 10.25   ,
         8.125  ,  8.5    , 13.125  ,  8.75   ]], dtype=float32)
 capability = 'sve_i8'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.03222656, 0.15234375, 0.00595093, 0.01092529, 0.10449219,
       0.07714844, 0.00253296, 0.06689453, 0.03442...
        14.     ,  7.71875,  7.125  , 11.8125 , 10.5625 , 10.25   ,
         8.125  ,  8.5    , 13.125  ,  8.75   ]]))
        before     = 9899975281491
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.03222656, 0.15234375, 0.00595093, 0.01092529, 0.10449219,
       0.07714844, 0.00253296, 0.06689453, 0.03442383, 0.06176758,
       0.01879883, 0.04638672, 0.01208496, 0.12060547, 0.11621094,
       0.13671875])
 y = array([0.03076172, 0.04760742, 0.03027344, 0.09033203, 0.01965332,
       0.08642578, 0.03417969, 0.02929688, 0.12255859, 0.05395508,
       0.0480957 , 0.05737305, 0.08691406, 0.09570312, 0.11132812,
       0.05541992])
 z = array([[12.75   , 11.1875 , 15.875  , 13.8125 , 11.125  , 11.875  ,
        11.0625 ,  9.0625 , 13.125  , 16.5    , 18...,
        14.     ,  7.71875,  7.125  , 11.8125 , 10.5625 , 10.25   ,
         8.125  ,  8.5    , 13.125  ,  8.75   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.00146484,  0.10473633, -0.02432251, -0.07940674,  0.08483887,
       -0.00927734, -0.03164673,  0.03759766, -0.08813477,  0.0078125 ,
       -0.02929688, -0.01098633, -0.0748291 ,  0.02490234,  0.00488281,
        0.08129883])
 x          = array([0.03222656, 0.15234375, 0.00595093, 0.01092529, 0.10449219,
       0.07714844, 0.00253296, 0.06689453, 0.03442383, 0.06176758,
       0.01879883, 0.04638672, 0.01208496, 0.12060547, 0.11621094,
       0.13671875])
 y          = array([0.03076172, 0.04760742, 0.03027344, 0.09033203, 0.01965332,
       0.08642578, 0.03417969, 0.02929688, 0.12255859, 0.05395508,
       0.0480957 , 0.05737305, 0.08691406, 0.09570312, 0.11132812,
       0.05541992])
 z          = array([[12.75   , 11.1875 , 15.875  , 13.8125 , 11.125  , 11.875  ,
        11.0625 ,  9.0625 , 13.125  , 16.5    , 18...,
        14.     ,  7.71875,  7.125  , 11.8125 , 10.5625 , 10.25   ,
         8.125  ,  8.5    , 13.125  ,  8.75   ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_i8-mahalanobis-16-5-5] __________________

 ndim = 16, metric = 'mahalanobis', capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.02589001, 0.11096373, 0.02480428, 0.16395815, 0.039616  ,
       0.10939859, 0.0723053 , 0.04827771, 0.11927722, 0.0386814 ,
       0.13197795, 0.03555128, 0.02138477, 0.02104093, 0.00775122,
       0.02912146], dtype=float32)
 a_bf16     = array([15572, 15843, 15563, 15912, 15650, 15840, 15764, 15686, 15860,
       15646, 15879, 15634, 15535, 15532, 15358, 15599], dtype=uint16)
 a_f32_rounded = array([0.02587891, 0.11083984, 0.02478027, 0.1640625 , 0.03955078,
       0.109375  , 0.07226562, 0.04833984, 0.11914062, 0.03857422,
       0.13183594, 0.03564453, 0.0213623 , 0.02099609, 0.00775146,
       0.0291748 ], dtype=float32)
 b          = array([0.03469316, 0.03265765, 0.04661204, 0.01855901, 0.11150639,
       0.03975366, 0.14731327, 0.0204233 , 0.0357987 , 0.02333193,
       0.05261657, 0.18633318, 0.11100812, 0.06621864, 0.04931891,
       0.0238555 ], dtype=float32)
 b_bf16     = array([15630, 15622, 15679, 15512, 15844, 15651, 15895, 15527, 15635,
       15551, 15704, 15935, 15843, 15752, 15690, 15555], dtype=uint16)
 b_f32_rounded = array([0.03466797, 0.03271484, 0.04663086, 0.01855469, 0.11132812,
       0.03979492, 0.14746094, 0.02038574, 0.03588867, 0.02331543,
       0.05273438, 0.18652344, 0.11083984, 0.06640625, 0.04931641,
       0.02380371], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[18.477634 , 12.355214 , 10.599759 , 14.627995 , 14.70157  ,
        10.166403 , 10.9607935, 10.796433 , 21.491... ,  7.976103 ,
         7.992052 ,  6.6306696,  9.355115 ,  5.0223966,  5.7073145,
         8.024025 ]], dtype=float32)
 c_bf16     = array([[16788, 16710, 16682, 16746, 16747, 16675, 16687, 16685, 16812,
        16777, 16764, 16699, 16787, 16715, 1669...664, 16682, 16577, 16558, 16557, 16483, 16713,
        16639, 16640, 16596, 16662, 16545, 16567, 16640]], dtype=uint16)
 c_f32_rounded = array([[18.5     , 12.375   , 10.625   , 14.625   , 14.6875  , 10.1875  ,
        10.9375  , 10.8125  , 21.5     , 17....46875, 12.5625  ,  7.96875 ,  8.      ,  6.625   ,
         9.375   ,  5.03125 ,  5.71875 ,  8.      ]], dtype=float32)
 capability = 'sve_i8'
 metric     = 'mahalanobis'
 ndim       = 16
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02587891, 0.11083984, 0.02478027, 0.1640625 , 0.03955078,
       0.109375  , 0.07226562, 0.04833984, 0.11914...5.40625 ,  3.546875, 12.5625  ,  7.96875 ,  8.      ,  6.625   ,
         9.375   ,  5.03125 ,  5.71875 ,  8.      ]]))
        before     = 9900684169953
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02587891, 0.11083984, 0.02478027, 0.1640625 , 0.03955078,
       0.109375  , 0.07226562, 0.04833984, 0.11914062, 0.03857422,
       0.13183594, 0.03564453, 0.0213623 , 0.02099609, 0.00775146,
       0.0291748 ])
 y = array([0.03466797, 0.03271484, 0.04663086, 0.01855469, 0.11132812,
       0.03979492, 0.14746094, 0.02038574, 0.03588867, 0.02331543,
       0.05273438, 0.18652344, 0.11083984, 0.06640625, 0.04931641,
       0.02380371])
 z = array([[18.5     , 12.375   , 10.625   , 14.625   , 14.6875  , 10.1875  ,
        10.9375  , 10.8125  , 21.5     , 17.... 5.40625 ,  3.546875, 12.5625  ,  7.96875 ,  8.      ,  6.625   ,
         9.375   ,  5.03125 ,  5.71875 ,  8.      ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.00878906,  0.078125  , -0.02185059,  0.14550781, -0.07177734,
        0.06958008, -0.07519531,  0.0279541 ,  0.08325195,  0.01525879,
        0.07910156, -0.15087891, -0.08947754, -0.04541016, -0.04156494,
        0.00537109])
 x          = array([0.02587891, 0.11083984, 0.02478027, 0.1640625 , 0.03955078,
       0.109375  , 0.07226562, 0.04833984, 0.11914062, 0.03857422,
       0.13183594, 0.03564453, 0.0213623 , 0.02099609, 0.00775146,
       0.0291748 ])
 y          = array([0.03466797, 0.03271484, 0.04663086, 0.01855469, 0.11132812,
       0.03979492, 0.14746094, 0.02038574, 0.03588867, 0.02331543,
       0.05273438, 0.18652344, 0.11083984, 0.06640625, 0.04931641,
       0.02380371])
 z          = array([[18.5     , 12.375   , 10.625   , 14.625   , 14.6875  , 10.1875  ,
        10.9375  , 10.8125  , 21.5     , 17.... 5.40625 ,  3.546875, 12.5625  ,  7.96875 ,  8.      ,  6.625   ,
         9.375   ,  5.03125 ,  5.71875 ,  8.      ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_i8-mahalanobis-33-1-5] __________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.06500575, 0.01846655, 0.00806695, 0.01618928, 0.00476489,
       0.01420154, 0.02382765, 0.05287013, 0.000199... 0.01400438, 0.00572764, 0.06392846, 0.03277836, 0.03347878,
       0.02458549, 0.09634076, 0.01367372], dtype=float32)
 a_bf16     = array([15749, 15511, 15364, 15493, 15260, 15465, 15555, 15705, 14673,
       15512, 15470, 15418, 15766, 15773, 15566,...15450, 15494, 15693, 15342, 15816, 15462, 15461, 15292,
       15747, 15622, 15625, 15561, 15813, 15456], dtype=uint16)
 a_f32_rounded = array([0.06494141, 0.01843262, 0.00805664, 0.01623535, 0.00476074,
       0.01422119, 0.02380371, 0.05297852, 0.000199... 0.01397705, 0.0057373 , 0.06396484, 0.03271484, 0.03344727,
       0.02453613, 0.09619141, 0.01367188], dtype=float32)
 b          = array([0.0103425 , 0.06920848, 0.00651194, 0.03787998, 0.03340735,
       0.01929932, 0.02352029, 0.00441932, 0.019459... 0.00730519, 0.10513155, 0.00891316, 0.00334564, 0.020593  ,
       0.01475271, 0.03090916, 0.00015765], dtype=float32)
 b_bf16     = array([15401, 15758, 15317, 15643, 15625, 15518, 15553, 15249, 15519,
       15061, 15734, 15407, 15272, 15659, 15777,...15639, 15591, 15280, 15854, 15731, 15671, 15343, 15831,
       15378, 15195, 15529, 15474, 15613, 14629], dtype=uint16)
 b_f32_rounded = array([0.01031494, 0.06933594, 0.00650024, 0.0378418 , 0.03344727,
       0.01928711, 0.02355957, 0.00442505, 0.019409... 0.0072937 , 0.10498047, 0.00891113, 0.00334167, 0.02062988,
       0.01477051, 0.03088379, 0.00015736], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[16.100248, 25.8665  , 29.115942, ..., 20.345184, 27.508173,
        16.533598],
       [25.8665  , 28.776363, ...    17.503775],
       [16.533598, 21.003635, 24.018234, ..., 16.202486, 17.503775,
        28.531885]], dtype=float32)
 c_bf16     = array([[16769, 16847, 16873, ..., 16803, 16860, 16772],
       [16847, 16870, 16864, ..., 16812, 16859, 16808],
      ...[16860, 16859, 16897, ..., 16825, 16823, 16780],
       [16772, 16808, 16832, ..., 16770, 16780, 16868]], dtype=uint16)
 c_f32_rounded = array([[16.125 , 25.875 , 29.125 , ..., 20.375 , 27.5   , 16.5   ],
       [25.875 , 28.75  , 28.    , ..., 21.5   , 2..., 23.125 , 22.875 , 17.5   ],
       [16.5   , 21.    , 24.    , ..., 16.25  , 17.5   , 28.5   ]],
      dtype=float32)
 capability = 'sve_i8'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.06494141, 0.01843262, 0.00805664, 0.01623535, 0.00476074,
       0.01422119, 0.02380371, 0.05297852, 0.00019...7.375 , 32.25  , ..., 23.125 , 22.875 , 17.5   ],
       [16.5   , 21.    , 24.    , ..., 16.25  , 17.5   , 28.5   ]]))
        before     = 9901350057161
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.06494141, 0.01843262, 0.00805664, 0.01623535, 0.00476074,
       0.01422119, 0.02380371, 0.05297852, 0.000199...1403809,
       0.01397705, 0.0057373 , 0.06396484, 0.03271484, 0.03344727,
       0.02453613, 0.09619141, 0.01367188])
 y = array([0.01031494, 0.06933594, 0.00650024, 0.0378418 , 0.03344727,
       0.01928711, 0.02355957, 0.00442505, 0.019409...4467773,
       0.0072937 , 0.10498047, 0.00891113, 0.00334167, 0.02062988,
       0.01477051, 0.03088379, 0.00015736])
 z = array([[16.125 , 25.875 , 29.125 , ..., 20.375 , 27.5   , 16.5   ],
       [25.875 , 28.75  , 28.    , ..., 21.5   , 2...27.375 , 32.25  , ..., 23.125 , 22.875 , 17.5   ],
       [16.5   , 21.    , 24.    , ..., 16.25  , 17.5   , 28.5   ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.05462646, -0.05090332,  0.0015564 , -0.02160645, -0.02868652,
       -0.00506592,  0.00024414,  0.04855347, ...
        0.00668335, -0.09924316,  0.05505371,  0.02937317,  0.01281738,
        0.00976562,  0.06530762,  0.01351452])
 x          = array([0.06494141, 0.01843262, 0.00805664, 0.01623535, 0.00476074,
       0.01422119, 0.02380371, 0.05297852, 0.000199...1403809,
       0.01397705, 0.0057373 , 0.06396484, 0.03271484, 0.03344727,
       0.02453613, 0.09619141, 0.01367188])
 y          = array([0.01031494, 0.06933594, 0.00650024, 0.0378418 , 0.03344727,
       0.01928711, 0.02355957, 0.00442505, 0.019409...4467773,
       0.0072937 , 0.10498047, 0.00891113, 0.00334167, 0.02062988,
       0.01477051, 0.03088379, 0.00015736])
 z          = array([[16.125 , 25.875 , 29.125 , ..., 20.375 , 27.5   , 16.5   ],
       [25.875 , 28.75  , 28.    , ..., 21.5   , 2...27.375 , 32.25  , ..., 23.125 , 22.875 , 17.5   ],
       [16.5   , 21.    , 24.    , ..., 16.25  , 17.5   , 28.5   ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_i8-mahalanobis-33-3-5] __________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([3.3498388e-02, 2.0406188e-02, 9.8255053e-03, 1.7376877e-02,
       1.8550944e-02, 1.3005721e-02, 2.2376694e-02,...4.3272797e-02,
       1.0071183e-02, 4.2324170e-02, 4.4711974e-02, 3.7185397e-02,
       2.1901580e-02], dtype=float32)
 a_bf16     = array([15625, 15527, 15393, 15502, 15512, 15445, 15543, 15756, 15384,
       15607, 15464, 15637, 14282, 15706, 15664,...15734, 15603, 15598, 15621, 15642, 15659, 15579, 15675,
       15665, 15397, 15661, 15671, 15640, 15539], dtype=uint16)
 a_f32_rounded = array([3.3447266e-02, 2.0385742e-02, 9.8266602e-03, 1.7333984e-02,
       1.8554688e-02, 1.3000488e-02, 2.2338867e-02,...4.3212891e-02,
       1.0070801e-02, 4.2236328e-02, 4.4677734e-02, 3.7109375e-02,
       2.1850586e-02], dtype=float32)
 b          = array([0.0415079 , 0.03046993, 0.04384029, 0.01273584, 0.02321521,
       0.02636783, 0.01086062, 0.03230003, 0.020096... 0.0355119 , 0.02290751, 0.02557399, 0.05547677, 0.03745944,
       0.02285388, 0.03659112, 0.02487819], dtype=float32)
 b_bf16     = array([15658, 15610, 15668, 15441, 15550, 15576, 15410, 15620, 15525,
       15547, 15665, 15536, 15588, 15712, 15571,...15542, 15546, 15226, 15696, 15686, 15538, 15633, 15548,
       15570, 15715, 15641, 15547, 15638, 15564], dtype=uint16)
 b_f32_rounded = array([0.04150391, 0.03051758, 0.04394531, 0.01275635, 0.02319336,
       0.02636719, 0.01086426, 0.03222656, 0.020141... 0.03540039, 0.02294922, 0.02563477, 0.05541992, 0.03735352,
       0.02282715, 0.03662109, 0.02490234], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[20.957998, 21.09855 , 27.241283, ..., 19.181421, 20.893793,
        20.089006],
       [21.09855 , 20.063026, ...    22.572443],
       [20.089006, 22.10936 , 27.188595, ..., 19.526749, 22.572443,
        29.281816]], dtype=float32)
 c_bf16     = array([[16808, 16809, 16858, ..., 16793, 16807, 16801],
       [16809, 16801, 16852, ..., 16798, 16833, 16817],
      ...[16807, 16833, 16893, ..., 16863, 16875, 16821],
       [16801, 16817, 16858, ..., 16796, 16821, 16874]], dtype=uint16)
 c_f32_rounded = array([[21.   , 21.125, 27.25 , ..., 19.125, 20.875, 20.125],
       [21.125, 20.125, 26.5  , ..., 19.75 , 24.125, 22.....625, ..., 27.875, 29.375, 22.625],
       [20.125, 22.125, 27.25 , ..., 19.5  , 22.625, 29.25 ]],
      dtype=float32)
 capability = 'sve_i8'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([3.34472656e-02, 2.03857422e-02, 9.82666016e-03, 1.73339844e-02,
       1.85546875e-02, 1.30004883e-02, 2.23388... [20.875, 24.125, 31.625, ..., 27.875, 29.375, 22.625],
       [20.125, 22.125, 27.25 , ..., 19.5  , 22.625, 29.25 ]]))
        before     = 9901999116414
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([3.34472656e-02, 2.03857422e-02, 9.82666016e-03, 1.73339844e-02,
       1.85546875e-02, 1.30004883e-02, 2.233886...969e-02, 4.32128906e-02,
       1.00708008e-02, 4.22363281e-02, 4.46777344e-02, 3.71093750e-02,
       2.18505859e-02])
 y = array([0.04150391, 0.03051758, 0.04394531, 0.01275635, 0.02319336,
       0.02636719, 0.01086426, 0.03222656, 0.020141...2172852,
       0.03540039, 0.02294922, 0.02563477, 0.05541992, 0.03735352,
       0.02282715, 0.03662109, 0.02490234])
 z = array([[21.   , 21.125, 27.25 , ..., 19.125, 20.875, 20.125],
       [21.125, 20.125, 26.5  , ..., 19.75 , 24.125, 22....  [20.875, 24.125, 31.625, ..., 27.875, 29.375, 22.625],
       [20.125, 22.125, 27.25 , ..., 19.5  , 22.625, 29.25 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.00805664, -0.01013184, -0.03411865,  0.00457764, -0.00463867,
       -0.0133667 ,  0.01147461,  0.03613281, ...
       -0.00866699,  0.02270508,  0.01757812, -0.04534912,  0.00488281,
        0.02185059,  0.00048828, -0.00305176])
 x          = array([3.34472656e-02, 2.03857422e-02, 9.82666016e-03, 1.73339844e-02,
       1.85546875e-02, 1.30004883e-02, 2.233886...969e-02, 4.32128906e-02,
       1.00708008e-02, 4.22363281e-02, 4.46777344e-02, 3.71093750e-02,
       2.18505859e-02])
 y          = array([0.04150391, 0.03051758, 0.04394531, 0.01275635, 0.02319336,
       0.02636719, 0.01086426, 0.03222656, 0.020141...2172852,
       0.03540039, 0.02294922, 0.02563477, 0.05541992, 0.03735352,
       0.02282715, 0.03662109, 0.02490234])
 z          = array([[21.   , 21.125, 27.25 , ..., 19.125, 20.875, 20.125],
       [21.125, 20.125, 26.5  , ..., 19.75 , 24.125, 22....  [20.875, 24.125, 31.625, ..., 27.875, 29.375, 22.625],
       [20.125, 22.125, 27.25 , ..., 19.5  , 22.625, 29.25 ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_i8-mahalanobis-33-4-5] __________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.0283592 , 0.00168231, 0.04644348, 0.00237417, 0.02409301,
       0.00628853, 0.02748876, 0.00337257, 0.056593... 0.03936133, 0.00650656, 0.03990307, 0.04421438, 0.02193734,
       0.0175251 , 0.01851598, 0.05491813], dtype=float32)
 a_bf16     = array([15592, 15069, 15678, 15132, 15557, 15310, 15585, 15197, 15720,
       15549, 15684, 15730, 15760, 15635, 15645,...15642, 15608, 15491, 15664, 15662, 15552, 15649, 15317,
       15651, 15669, 15540, 15504, 15512, 15713], dtype=uint16)
 a_f32_rounded = array([0.02832031, 0.0016861 , 0.04638672, 0.00238037, 0.02404785,
       0.00628662, 0.02746582, 0.00337219, 0.056640... 0.03930664, 0.00650024, 0.03979492, 0.04418945, 0.02197266,
       0.01757812, 0.01855469, 0.05493164], dtype=float32)
 b          = array([0.04158661, 0.01064336, 0.0091174 , 0.04723426, 0.00237923,
       0.02402475, 0.0128775 , 0.02983634, 0.019748... 0.04140832, 0.03421516, 0.07289051, 0.09091413, 0.04531119,
       0.00092399, 0.02406429, 0.00708457], dtype=float32)
 b_bf16     = array([15658, 15406, 15381, 15681, 15132, 15557, 15443, 15604, 15522,
       15607, 15648, 15289, 15553, 15555, 15811,...15448, 15538, 15738, 15741, 15103, 15416, 15658, 15628,
       15765, 15802, 15674, 14962, 15557, 15336], dtype=uint16)
 b_f32_rounded = array([0.04150391, 0.01062012, 0.00909424, 0.04711914, 0.00238037,
       0.02404785, 0.01287842, 0.02978516, 0.019775... 0.04150391, 0.03417969, 0.07275391, 0.09082031, 0.04541016,
       0.00092316, 0.02404785, 0.00708008], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[23.734608, 28.935307, 26.915718, ..., 21.06295 , 25.14896 ,
        21.353434],
       [28.935307, 28.425392, ...    17.534319],
       [21.353434, 28.342762, 25.826498, ..., 20.355816, 17.534319,
        32.370846]], dtype=float32)
 c_bf16     = array([[16830, 16871, 16855, ..., 16809, 16841, 16811],
       [16871, 16867, 16912, ..., 16833, 16892, 16867],
      ...[16841, 16892, 16898, ..., 16820, 16834, 16780],
       [16811, 16867, 16847, ..., 16803, 16780, 16897]], dtype=uint16)
 c_f32_rounded = array([[23.75 , 28.875, 26.875, ..., 21.125, 25.125, 21.375],
       [28.875, 28.375, 36.   , ..., 24.125, 31.5  , 28.....5  , ..., 22.5  , 24.25 , 17.5  ],
       [21.375, 28.375, 25.875, ..., 20.375, 17.5  , 32.25 ]],
      dtype=float32)
 capability = 'sve_i8'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.02832031, 0.0016861 , 0.04638672, 0.00238037, 0.02404785,
       0.00628662, 0.02746582, 0.00337219, 0.05664... [25.125, 31.5  , 32.5  , ..., 22.5  , 24.25 , 17.5  ],
       [21.375, 28.375, 25.875, ..., 20.375, 17.5  , 32.25 ]]))
        before     = 9902641254145
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.02832031, 0.0016861 , 0.04638672, 0.00238037, 0.02404785,
       0.00628662, 0.02746582, 0.00337219, 0.056640...234375 ,
       0.03930664, 0.00650024, 0.03979492, 0.04418945, 0.02197266,
       0.01757812, 0.01855469, 0.05493164])
 y = array([0.04150391, 0.01062012, 0.00909424, 0.04711914, 0.00238037,
       0.02404785, 0.01287842, 0.02978516, 0.019775...1123047,
       0.04150391, 0.03417969, 0.07275391, 0.09082031, 0.04541016,
       0.00092316, 0.02404785, 0.00708008])
 z = array([[23.75 , 28.875, 26.875, ..., 21.125, 25.125, 21.375],
       [28.875, 28.375, 36.   , ..., 24.125, 31.5  , 28....  [25.125, 31.5  , 32.5  , ..., 22.5  , 24.25 , 17.5  ],
       [21.375, 28.375, 25.875, ..., 20.375, 17.5  , 32.25 ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([-0.01318359, -0.00893402,  0.03729248, -0.04473877,  0.02166748,
       -0.01776123,  0.0145874 , -0.02641296, ...
       -0.00219727, -0.02767944, -0.03295898, -0.04663086, -0.0234375 ,
        0.01665497, -0.00549316,  0.04785156])
 x          = array([0.02832031, 0.0016861 , 0.04638672, 0.00238037, 0.02404785,
       0.00628662, 0.02746582, 0.00337219, 0.056640...234375 ,
       0.03930664, 0.00650024, 0.03979492, 0.04418945, 0.02197266,
       0.01757812, 0.01855469, 0.05493164])
 y          = array([0.04150391, 0.01062012, 0.00909424, 0.04711914, 0.00238037,
       0.02404785, 0.01287842, 0.02978516, 0.019775...1123047,
       0.04150391, 0.03417969, 0.07275391, 0.09082031, 0.04541016,
       0.00092316, 0.02404785, 0.00708008])
 z          = array([[23.75 , 28.875, 26.875, ..., 21.125, 25.125, 21.375],
       [28.875, 28.375, 36.   , ..., 24.125, 31.5  , 28....  [25.125, 31.5  , 32.5  , ..., 22.5  , 24.25 , 17.5  ],
       [21.375, 28.375, 25.875, ..., 20.375, 17.5  , 32.25 ]])

 scripts/test.py:152: RuntimeWarning
 _________________ test_curved_bf16[sve_i8-mahalanobis-33-5-5] __________________

 ndim = 33, metric = 'mahalanobis', capability = 'sve_i8'
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("ndim", [11, 16, 33])
    @pytest.mark.parametrize("metric", ["bilinear", "mahalanobis"])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_curved_bf16(ndim, metric, capability, stats_fixture):
        """Compares various SIMD kernels (like Bilinear Forms and Mahalanobis distances) for curved spaces
        with their NumPy or baseline counterparts, testing accuracy for the Brain-float format not
        natively supported by NumPy."""
    
        np.random.seed()
    
        # Let's generate some non-negative probability distributions
        a = np.abs(np.random.randn(ndim).astype(np.float32))
        b = np.abs(np.random.randn(ndim).astype(np.float32))
        a /= np.sum(a)
        b /= np.sum(b)
    
        # Let's compute the inverse of the covariance matrix, otherwise in the SciPy
        # implementation of the Mahalanobis we may face `sqrt` of a negative number.
        # We multiply the matrix by its transpose to get a positive-semi-definite matrix.
        c = np.abs(np.random.randn(ndim, ndim).astype(np.float32))
        c = np.dot(c, c.T)
    
        a_f32_rounded, a_bf16 = f32_downcast_to_bf16(a)
        b_f32_rounded, b_bf16 = f32_downcast_to_bf16(b)
        c_f32_rounded, c_bf16 = f32_downcast_to_bf16(c)
    
        keep_one_capability(capability)
        baseline_kernel, simd_kernel = name_to_kernels(metric)
 >       accurate_dt, accurate = profile(
            baseline_kernel,
            a_f32_rounded.astype(np.float64),
            b_f32_rounded.astype(np.float64),
            c_f32_rounded.astype(np.float64),
        )

 a          = array([0.03938453, 0.01744192, 0.00876469, 0.03913392, 0.01067649,
       0.0761814 , 0.01579213, 0.00519783, 0.008951... 0.02293723, 0.01011753, 0.01059396, 0.0421485 , 0.06604147,
       0.03643602, 0.04872839, 0.00047075], dtype=float32)
 a_bf16     = array([15649, 15503, 15376, 15648, 15407, 15772, 15489, 15274, 15379,
       15520, 15716, 15557, 15696, 15651, 15523,...15684, 15391, 15334, 15686, 15774, 15621, 15548, 15398,
       15406, 15661, 15751, 15637, 15688, 14839], dtype=uint16)
 a_f32_rounded = array([0.03930664, 0.01745605, 0.00878906, 0.0390625 , 0.01068115,
       0.07617188, 0.01574707, 0.00518799, 0.008972... 0.02294922, 0.01013184, 0.01062012, 0.04223633, 0.06591797,
       0.03637695, 0.04882812, 0.00047112], dtype=float32)
 b          = array([0.02835833, 0.02101716, 0.03439811, 0.0188875 , 0.02569134,
       0.0100259 , 0.00590959, 0.05322531, 0.045753... 0.02513658, 0.03333756, 0.02183465, 0.04372667, 0.01237824,
       0.00114537, 0.00862422, 0.04967602], dtype=float32)
 b_bf16     = array([15592, 15532, 15629, 15515, 15570, 15396, 15298, 15706, 15675,
       15091, 15633, 15516, 15432, 15622, 15698,...15646, 15663, 15787, 15417, 15403, 15786, 15566, 15625,
       15539, 15667, 15435, 14998, 15373, 15691], dtype=uint16)
 b_f32_rounded = array([0.02832031, 0.02099609, 0.03442383, 0.0189209 , 0.02563477,
       0.01000977, 0.00592041, 0.05322266, 0.045654... 0.02514648, 0.03344727, 0.02185059, 0.04370117, 0.01239014,
       0.00114441, 0.00860596, 0.04956055], dtype=float32)
 baseline_kernel = <function baseline_mahalanobis at 0x7ffff48ef4c0>
 c          = array([[15.959372, 15.428329, 18.655676, ..., 14.320891, 15.904877,
        17.724705],
       [15.428329, 25.770632, ...    18.723143],
       [17.724705, 21.073706, 16.478521, ..., 21.267336, 18.723143,
        30.000797]], dtype=float32)
 c_bf16     = array([[16767, 16759, 16789, ..., 16741, 16766, 16782],
       [16759, 16846, 16828, ..., 16818, 16817, 16809],
      ...[16766, 16817, 16809, ..., 16807, 16813, 16790],
       [16782, 16809, 16772, ..., 16810, 16790, 16880]], dtype=uint16)
 c_f32_rounded = array([[15.9375, 15.4375, 18.625 , ..., 14.3125, 15.875 , 17.75  ],
       [15.4375, 25.75  , 23.5   , ..., 22.25  , 2..., 20.875 , 21.625 , 18.75  ],
       [17.75  , 21.125 , 16.5   , ..., 21.25  , 18.75  , 30.    ]],
      dtype=float32)
 capability = 'sve_i8'
 metric     = 'mahalanobis'
 ndim       = 33
 simd_kernel = <built-in function mahalanobis>
 stats_fixture = {'absolute_baseline_error': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...], 'absolute_simsimd_error': [4.440892098500626e-16, 1.1...3, 43411, 32931, 48030, 30436, 123800, ...], 'baseline_duration': [24094, 16421, 21350, 26339, 15288, 20989, ...], ...}

 scripts/test.py:779: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 scripts/test.py:179: in profile
    result = callable(*args, **kwargs)
        args       = (array([0.03930664, 0.01745605, 0.00878906, 0.0390625 , 0.01068115,
       0.07617188, 0.01574707, 0.00518799, 0.00897...2.125 , 21.125 , ..., 20.875 , 21.625 , 18.75  ],
       [17.75  , 21.125 , 16.5   , ..., 21.25  , 18.75  , 30.    ]]))
        before     = 9903266290841
        callable   = <function baseline_mahalanobis at 0x7ffff48ef4c0>
        kwargs     = {}
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 x = array([0.03930664, 0.01745605, 0.00878906, 0.0390625 , 0.01068115,
       0.07617188, 0.01574707, 0.00518799, 0.008972...324707 ,
       0.02294922, 0.01013184, 0.01062012, 0.04223633, 0.06591797,
       0.03637695, 0.04882812, 0.00047112])
 y = array([0.02832031, 0.02099609, 0.03442383, 0.0189209 , 0.02563477,
       0.01000977, 0.00592041, 0.05322266, 0.045654...8300781,
       0.02514648, 0.03344727, 0.02185059, 0.04370117, 0.01239014,
       0.00114441, 0.00860596, 0.04956055])
 z = array([[15.9375, 15.4375, 18.625 , ..., 14.3125, 15.875 , 17.75  ],
       [15.4375, 25.75  , 23.5   , ..., 22.25  , 2...22.125 , 21.125 , ..., 20.875 , 21.625 , 18.75  ],
       [17.75  , 21.125 , 16.5   , ..., 21.25  , 18.75  , 30.    ]])

    def baseline_mahalanobis(x, y, z):
        diff = x - y
 >       return np.sqrt(diff @ z @ diff)
 E       RuntimeWarning: invalid value encountered in sqrt

 diff       = array([ 0.01098633, -0.00354004, -0.02563477,  0.0201416 , -0.01495361,
        0.06616211,  0.00982666, -0.04803467, ...
       -0.00219727, -0.02331543, -0.01123047, -0.00146484,  0.05352783,
        0.03523254,  0.04022217, -0.04908943])
 x          = array([0.03930664, 0.01745605, 0.00878906, 0.0390625 , 0.01068115,
       0.07617188, 0.01574707, 0.00518799, 0.008972...324707 ,
       0.02294922, 0.01013184, 0.01062012, 0.04223633, 0.06591797,
       0.03637695, 0.04882812, 0.00047112])
 y          = array([0.02832031, 0.02099609, 0.03442383, 0.0189209 , 0.02563477,
       0.01000977, 0.00592041, 0.05322266, 0.045654...8300781,
       0.02514648, 0.03344727, 0.02185059, 0.04370117, 0.01239014,
       0.00114441, 0.00860596, 0.04956055])
 z          = array([[15.9375, 15.4375, 18.625 , ..., 14.3125, 15.875 , 17.75  ],
       [15.4375, 25.75  , 23.5   , ..., 22.25  , 2...22.125 , 21.125 , ..., 20.875 , 21.625 , 18.75  ],
       [17.75  , 21.125 , 16.5   , ..., 21.25  , 18.75  , 30.    ]])

 scripts/test.py:152: RuntimeWarning
 ____________________ test_intersect[neon-10-10-uint16-3-5] _____________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 10
 capability = 'neon'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [2] from [ 0  2 12 17 19] and [ 1  2  5 16]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([ 0,  2, 12, 17, 19], dtype=uint16)
 a_length   = 8
 b          = array([ 1,  2,  5, 16], dtype=uint16)
 b_length   = 6
 capability = 'neon'
 dtype      = 'uint16'
 expected   = 1
 first_length_bound = 10
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ____________________ test_intersect[neon-10-10-uint16-4-5] _____________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 10
 capability = 'neon'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [3 4] from [ 0  3  4  7 10 12 13 16] and [ 2  3  4 11 15]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([ 0,  3,  4,  7, 10, 12, 13, 16], dtype=uint16)
 a_length   = 9
 b          = array([ 2,  3,  4, 11, 15], dtype=uint16)
 b_length   = 6
 capability = 'neon'
 dtype      = 'uint16'
 expected   = 2
 first_length_bound = 10
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ____________________ test_intersect[neon-10-10-uint32-4-5] _____________________

 dtype = 'uint32', first_length_bound = 10, second_length_bound = 10
 capability = 'neon'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [ 0  2  9 10 12 14] and [ 1  4 11]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([ 0,  2,  9, 10, 12, 14], dtype=uint32)
 a_length   = 6
 b          = array([ 1,  4, 11], dtype=uint32)
 b_length   = 3
 capability = 'neon'
 dtype      = 'uint32'
 expected   = 0
 first_length_bound = 10
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ____________________ test_intersect[neon-10-100-uint16-5-5] ____________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 10
 capability = 'neon'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [12 16 18] from [  0   6  12  16  18  19  21  23  24  26  29  37  42  43  45  48  50  52
 E           57  60  64  65  67  68  69  77  80  83  85  92  95  97 100 102 103 107
 E          108 109 113 121 124 127 128 131 133 137 140 143 145 146 155 157 159 162
 E          164 167 169 170 171 172 176 177 178 181 182 189 190 191 194 196 197 199] and [ 3 12 16 18]
 E       assert 3 == 4
 E        +  where 3 = round(3.0)
 E        +    where 3.0 = float(3)
 E        +  and   4 = round(4.0)
 E        +    where 4.0 = float(4.0)

 a          = array([  0,   6,  12,  16,  18,  19,  21,  23,  24,  26,  29,  37,  42,
        43,  45,  48,  50,  52,  57,  60,  64,...9, 162, 164, 167, 169, 170, 171, 172, 176, 177, 178, 181, 182,
       189, 190, 191, 194, 196, 197, 199], dtype=uint16)
 a_length   = 83
 b          = array([ 3, 12, 16, 18], dtype=uint16)
 b_length   = 4
 capability = 'neon'
 dtype      = 'uint16'
 expected   = 3
 first_length_bound = 100
 result     = 4.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[neon-10-1000-uint16-3-5] ____________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 10
 capability = 'neon'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [   0    2    6    7    8   13   14   19   21   22   23   25   31   32
 E            37   38   42   48   51   54   55   59   60   64   66   68   71   74
 E            80   83   84   85   87   89   90   93   96   98  100  103  104  105
 E           106  110  113  116  117  119  120  122  123  124  126  127  128  131
 E           133  134  136  137  143  146  152  154  157  160  162  166  168  172
 E           175  177  180  185  187  190  191  192  195  200  212  219  222  226
 E           228  230  233  234  235  236  241  246  248  249  252  255  257  258
 E           259  265  267  272  273  275  276  277  279  280  281  283  293  298
 E           299  300  301  303  304  305  307  309  312  318  323  325  328  329
 E           336  337  339  340  341  342  344  346  347  348  350  352  353  361
 E           362  365  366  370  377  379  380  382  386  387  391  397  399  404
 E           408  410  417  419  423  425  426  427  428  429  432  433  434  435
 E           437  438  439  440  443  445  448  449  450  451  453  454  456  460
 E           465  466  468  470  472  473  477  479  481  483  488  489  495  496
 E           499  505  508  512  521  526  529  530  534  536  538  544  546  550
 E           551  554  557  558  561  562  563  566  568  569  570  571  573  577
 E           579  580  588  589  591  593  595  598  599  601  604  605  609  610
 E           613  614  616  617  619  620  628  629  630  634  635  636  638  641
 E           643  645  649  654  655  656  657  659  664  669  673  675  679  680
 E           684  686  687  689  692  693  697  698  700  701  705  707  708  712
 E           716  725  729  731  732  733  734  740  746  747  748  753  754  759
 E           763  764  765  766  772  773  774  779  783  791  792  793  801  802
 E           804  806  807  808  811  813  814  816  821  822  829  832  835  838
 E           844  848  849  850  851  856  857  858  861  862  863  864  867  869
 E           870  872  873  875  877  879  881  883  887  892  900  902  903  906
 E           907  911  913  914  916  917  921  923  924  925  927  931  932  935
 E           940  944  946  947  953  955  960  967  968  974  975  981  983  984
 E           986  987  989  991  996  998  999 1000 1010 1014 1015 1019 1022 1026
 E          1032 1033 1036 1037 1041 1042 1045 1047 1052 1053 1055 1060 1063 1064
 E          1065 1066 1067 1073 1075 1076 1078 1082 1083 1088 1090 1092 1099 1101
 E          1104 1105 1108 1109 1110 1114 1118 1119 1121 1122 1124 1129 1132 1134
 E          1136 1138 1142 1146 1147 1150 1151 1154 1155 1159 1161 1162 1166 1168
 E          1174 1175 1177 1178 1180 1187 1190 1192 1193 1194 1197 1203 1205 1207
 E          1208 1211 1212 1220 1225 1227 1228 1231 1236 1237 1242 1246 1249 1250
 E          1252 1253 1254 1255 1258 1259 1260 1261 1262 1263 1270 1271 1282 1285
 E          1286 1293 1294 1295 1296 1300 1301 1305 1306 1308 1310 1311 1315 1320
 E          1321 1325 1331 1333 1334 1335 1336 1337 1338 1340 1342 1344 1345 1348
 E          1350 1353 1354 1361 1362 1363 1364 1365 1366 1367 1371 1373 1375 1381
 E          1382 1388 1392 1400 1402 1403 1406 1409 1413 1417 1418 1419 1420 1424
 E          1430 1431 1435 1436 1437 1439 1446 1447 1448 1454 1455 1461 1462 1463
 E          1464 1470 1471 1472 1473 1474 1481 1482 1483 1484 1488 1489 1492 1495
 E          1496 1506 1507 1508 1509 1510 1512 1513 1514 1515 1517 1519 1520 1523
 E          1526 1527 1534 1536 1539 1541 1546 1548 1549 1551 1555 1556 1557 1558
 E          1559 1560 1561 1563 1567 1574 1576 1577 1578 1580 1584 1585 1592 1594
 E          1595 1598 1605 1609 1611 1613 1616 1619 1622 1625 1627 1634 1638 1643
 E          1644 1647 1648 1649 1651 1655 1656 1657 1658 1659 1661 1662 1663 1667
 E          1668 1670 1674 1679 1681 1684 1688 1690 1694 1696 1700 1701 1703 1705
 E          1707 1709 1725 1726 1727 1729 1731 1732 1739 1740 1742 1743 1745 1746
 E          1750 1751 1754 1758 1759 1761 1762 1765 1766 1773 1774 1780 1784 1785
 E          1788 1791 1796 1800 1803 1805 1806 1818 1819 1820 1826 1829 1830 1831
 E          1833 1834 1839 1840 1841 1844 1845 1849 1852 1853 1854 1857 1860 1862
 E          1864 1868 1870 1874 1877 1879 1880 1883 1886 1890 1891 1892 1893 1898
 E          1899 1902 1905 1908 1909 1913 1915 1916 1925 1928 1934 1935 1939 1940
 E          1944 1945 1947 1948 1949 1952 1962 1963 1969 1971 1972 1974 1978 1980
 E          1982 1983 1985 1990 1995 1996 1997 1998 1999] and [15]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([   0,    2,    6,    7,    8,   13,   14,   19,   21,   22,   23,
         25,   31,   32,   37,   38,   42,   ..., 1963, 1969, 1971, 1972, 1974, 1978, 1980, 1982, 1983, 1985,
       1990, 1995, 1996, 1997, 1998, 1999], dtype=uint16)
 a_length   = 953
 b          = array([15], dtype=uint16)
 b_length   = 1
 capability = 'neon'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 1000
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[neon_f16-10-10-uint16-3-5] ___________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 10
 capability = 'neon_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [15 18] from [ 0  2 15 17 18] and [ 4  7 15 18]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([ 0,  2, 15, 17, 18], dtype=uint16)
 a_length   = 8
 b          = array([ 4,  7, 15, 18], dtype=uint16)
 b_length   = 4
 capability = 'neon_f16'
 dtype      = 'uint16'
 expected   = 2
 first_length_bound = 10
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[neon_f16-10-100-uint32-5-5] __________________

 dtype = 'uint32', first_length_bound = 100, second_length_bound = 10
 capability = 'neon_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [6] from [  0   6   9  11  12  13  16  19  26  28  30  33  36  39  43  61  62  63
 E           67  68  70  72  73  79  88  89  92  97  98 102 104 106 110 111 118 120
 E          123 130 131 134 136 141 154 155 159 163 168 169 173 174 175 177 180 187
 E          190] and [5 6 8]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([  0,   6,   9,  11,  12,  13,  16,  19,  26,  28,  30,  33,  36,
        39,  43,  61,  62,  63,  67,  68,  70,... 130, 131,
       134, 136, 141, 154, 155, 159, 163, 168, 169, 173, 174, 175, 177,
       180, 187, 190], dtype=uint32)
 a_length   = 67
 b          = array([5, 6, 8], dtype=uint32)
 b_length   = 3
 capability = 'neon_f16'
 dtype      = 'uint32'
 expected   = 1
 first_length_bound = 100
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_f16-10-1000-uint16-1-5] __________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 10
 capability = 'neon_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [1] from [   0    1   12   20   25   27   28   29   36   37   39   40   44   45
 E            46   58   60   61   68   75   79   80   81   86   90   92   94   99
 E           100  102  108  109  111  113  114  120  123  125  127  129  132  136
 E           138  142  153  158  159  160  167  169  170  177  180  183  187  195
 E           201  202  205  208  211  217  222  223  224  225  228  234  237  244
 E           250  252  259  264  267  273  285  286  287  288  290  292  301  303
 E           304  306  307  310  320  321  323  329  331  334  339  342  343  349
 E           351  352  358  359  360  363  367  370  371  377  379  380  383  384
 E           392  393  397  401  402  403  404  406  407  410  417  422  423  427
 E           435  436  441  444  445  446  448  453  455  456  461  465  469  473
 E           474  482  487  496  507  508  511  514  516  521  524  527  529  536
 E           537  540  541  543  545  546  547  549  558  563  565  567  572  574
 E           576  581  583  584  588  589  595  596  599  605  612  614  616  619
 E           621  627  628  630  631  632  633  638  649  654  656  660  662  663
 E           664  670  671  673  674  678  683  684  685  686  688  690  691  694
 E           699  702  707  708  711  712  714  717  719  720  732  736  738  740
 E           743  746  748  754  756  760  768  774  778  780  781  782  784  785
 E           787  792  794  796  797  800  806  812  813  818  821  822  826  827
 E           829  833  836  837  839  841  851  858  859  861  862  864  865  870
 E           871  874  879  883  884  888  889  893  900  901  902  903  904  906
 E           909  910  915  929  933  935  942  943  946  955  961  971  975  976
 E           978  980  982  984  990  995  996  998 1001 1002 1006 1011 1017 1019
 E          1022 1023 1026 1031 1035 1043 1046 1052 1060 1063 1067 1072 1076 1077
 E          1079 1083 1085 1087 1091 1097 1098 1101 1104 1107 1108 1112 1114 1116
 E          1117 1118 1119 1120 1127 1128 1131 1135 1137 1140 1143 1146 1150 1153
 E          1156 1159 1164 1167 1177 1185 1186 1199 1201 1203 1208 1209 1214 1217
 E          1218 1223 1224 1228 1229 1230 1233 1235 1241 1250 1256 1264 1269 1271
 E          1273 1278 1279 1281 1282 1283 1289 1290 1291 1293 1298 1305 1308 1309
 E          1315 1317 1318 1319 1320 1322 1330 1332 1333 1334 1342 1344 1345 1347
 E          1349 1352 1353 1355 1358 1359 1362 1364 1365 1367 1370 1377 1379 1380
 E          1382 1389 1393 1394 1403 1410 1413 1421 1423 1424 1426 1430 1438 1440
 E          1443 1447 1449 1455 1456 1457 1460 1470 1473 1475 1478 1482 1484 1485
 E          1486 1491 1494 1496 1500 1502 1508 1509 1512 1519 1524 1525 1526 1535
 E          1536 1540 1543 1548 1549 1551 1552 1555 1556 1557 1561 1563 1569 1574
 E          1576 1588 1589 1593 1595 1597 1598 1600 1601 1603 1607 1608 1609 1610
 E          1611 1613 1617 1619 1620 1621 1624 1627 1628 1630 1639 1643 1648 1653
 E          1654 1663 1671 1676 1683 1686 1688 1690 1691 1693 1694 1696 1703 1705
 E          1710 1713 1714 1724 1725 1731 1733 1734 1738 1739 1741 1744 1747 1748
 E          1762 1764 1766 1770 1771 1773 1774 1777 1778 1779 1782 1787 1789 1793
 E          1799 1803 1805 1811 1815 1821 1822 1823 1827 1836 1837 1839 1859 1860
 E          1863 1866 1869 1871 1874 1875 1882 1885 1889 1891 1893 1894 1895 1900
 E          1903 1905 1908 1913 1917 1924 1926 1928 1935 1950 1953 1954 1958 1961
 E          1963 1965 1967 1968 1971 1972 1973 1975 1976 1979 1982 1985 1986 1989
 E          1994 1999] and [ 1  6  7 16 18]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([   0,    1,   12,   20,   25,   27,   28,   29,   36,   37,   39,
         40,   44,   45,   46,   58,   60,   ..., 1965, 1967, 1968, 1971, 1972,
       1973, 1975, 1976, 1979, 1982, 1985, 1986, 1989, 1994, 1999],
      dtype=uint16)
 a_length   = 714
 b          = array([ 1,  6,  7, 16, 18], dtype=uint16)
 b_length   = 6
 capability = 'neon_f16'
 dtype      = 'uint16'
 expected   = 1
 first_length_bound = 1000
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[neon_f16-100-10-uint16-2-5] __________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 100
 capability = 'neon_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [ 0  7 16 19] and [ 29  53  79 114 144 164]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([ 0,  7, 16, 19], dtype=uint16)
 a_length   = 5
 b          = array([ 29,  53,  79, 114, 144, 164], dtype=uint16)
 b_length   = 6
 capability = 'neon_f16'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 10
 result     = 1.0
 second_length_bound = 100

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_f16-100-100-uint16-2-5] __________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 100
 capability = 'neon_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [137 195] from [  0   4  12  13  15  16  20  21  24  25  34  35  41  44  46  48  49  50
 E           56  57  72  75  78  79  82  85  87  91  92  96  97  98 102 105 106 108
 E          113 115 117 123 124 129 137 138 139 144 146 147 154 156 160 163 166 180
 E          188 189 190 195] and [ 18 137 148 195]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([  0,   4,  12,  13,  15,  16,  20,  21,  24,  25,  34,  35,  41,
        44,  46,  48,  49,  50,  56,  57,  72,...   123, 124, 129, 137, 138, 139, 144, 146, 147, 154, 156, 160, 163,
       166, 180, 188, 189, 190, 195], dtype=uint16)
 a_length   = 68
 b          = array([ 18, 137, 148, 195], dtype=uint16)
 b_length   = 4
 capability = 'neon_f16'
 dtype      = 'uint16'
 expected   = 2
 first_length_bound = 100
 result     = 3.0
 second_length_bound = 100

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_f16-100-1000-uint16-5-5] _________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 100
 capability = 'neon_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 19  23  54 150 163] from [   0    1    3    4    6    9   11   14   15   17   18   19   21   23
 E            26   28   33   35   36   44   48   51   54   55   56   61   63   64
 E            69   73   77   83   84   86   89  102  103  105  111  114  115  117
 E           125  128  139  141  142  149  150  154  156  157  163  166  167  168
 E           169  170  174  178  186  192  193  194  196  200  202  203  204  206
 E           209  210  214  215  217  220  221  222  224  226  228  229  234  237
 E           240  242  243  249  250  252  254  257  259  260  261  264  265  267
 E           269  270  273  274  278  279  281  282  284  290  292  294  295  296
 E           298  302  310  311  314  319  321  323  327  331  332  333  336  347
 E           349  351  353  354  355  358  360  364  365  368  372  375  377  378
 E           380  381  382  390  396  398  400  402  409  415  418  421  427  428
 E           434  439  447  448  449  450  455  461  463  469  471  472  479  480
 E           481  486  497  498  500  501  503  504  509  511  512  513  514  516
 E           517  518  522  523  525  527  530  531  539  540  550  554  556  557
 E           558  561  565  570  571  573  574  578  582  584  588  590  593  595
 E           596  599  605  609  610  618  620  621  624  625  626  627  628  630
 E           633  634  636  641  643  648  650  651  653  658  659  661  662  663
 E           672  675  676  689  690  700  701  702  703  706  708  710  711  713
 E           714  715  717  720  729  730  732  738  739  743  747  749  751  753
 E           754  756  757  759  762  767  768  771  775  777  778  779  783  784
 E           787  788  790  791  792  798  801  803  804  807  809  812  815  817
 E           819  820  823  824  825  826  828  829  832  836  838  840  845  851
 E           852  854  856  857  858  861  863  864  867  873  875  878  879  883
 E           884  885  887  890  892  893  894  896  902  904  905  908  916  918
 E           921  924  925  929  930  931  936  937  940  945  946  947  949  954
 E           956  958  961  968  973  979  981  989  990  995  996  999 1000 1003
 E          1008 1011 1017 1019 1020 1026 1028 1029 1030 1041 1042 1045 1046 1055
 E          1056 1058 1061 1065 1066 1067 1068 1073 1075 1078 1081 1082 1090 1099
 E          1102 1105 1106 1107 1108 1109 1113 1114 1119 1121 1122 1123 1124 1125
 E          1140 1142 1143 1145 1146 1147 1149 1150 1156 1159 1163 1166 1167 1168
 E          1172 1173 1175 1180 1186 1187 1190 1194 1197 1201 1203 1205 1207 1210
 E          1216 1223 1224 1225 1227 1229 1230 1231 1236 1239 1240 1248 1250 1252
 E          1253 1255 1257 1258 1264 1265 1270 1271 1274 1276 1278 1281 1284 1285
 E          1288 1289 1290 1293 1295 1297 1301 1304 1306 1307 1308 1309 1311 1318
 E          1320 1324 1326 1327 1330 1332 1335 1336 1339 1340 1345 1347 1348 1349
 E          1354 1355 1356 1357 1359 1360 1361 1364 1365 1366 1367 1368 1370 1373
 E          1374 1376 1377 1381 1382 1386 1389 1392 1395 1400 1414 1415 1416 1418
 E          1419 1428 1433 1434 1436 1438 1442 1445 1447 1448 1450 1452 1456 1457
 E          1458 1459 1461 1464 1466 1468 1470 1471 1474 1483 1485 1490 1491 1494
 E          1495 1507 1510 1512 1513 1515 1519 1525 1528 1530 1531 1535 1542 1544
 E          1547 1549 1552 1553 1554 1556 1557 1560 1561 1562 1564 1568 1569 1574
 E          1575 1576 1585 1588 1590 1591 1592 1593 1596 1598 1600 1602 1607 1609
 E          1610 1612 1613 1614 1617 1618 1619 1622 1624 1625 1628 1629 1631 1632
 E          1634 1641 1642 1644 1645 1647 1648 1650 1651 1653 1656 1659 1660 1662
 E          1663 1664 1669 1670 1672 1680 1681 1682 1686 1688 1694 1696 1697 1698
 E          1699 1700 1701 1702 1707 1708 1713 1718 1719 1723 1725 1729 1731 1733
 E          1734 1737 1738 1741 1744 1745 1747 1748 1753 1758 1760 1763 1765 1767
 E          1771 1773 1775 1781 1782 1784 1786 1795 1798 1800 1802 1805 1807 1809
 E          1813 1814 1823 1825 1827 1830 1834 1835 1837 1844 1845 1847 1848 1849
 E          1850 1853 1857 1860 1861 1862 1864 1867 1868 1869 1871 1877 1880 1882
 E          1884 1886 1892 1894 1900 1906 1910 1911 1914 1921 1923 1929 1931 1936
 E          1943 1944 1951 1953 1954 1956 1960 1963 1967 1975 1978 1983 1985 1988
 E          1990 1994 1996] and [  7  19  23  27  37  47  49  53  54  60  66  67  72  79  87 110 127 134
 E          143 150 152 153 161 163 172 173 187 188]
 E       assert 5 == 6
 E        +  where 5 = round(5.0)
 E        +    where 5.0 = float(5)
 E        +  and   6 = round(6.0)
 E        +    where 6.0 = float(6.0)

 a          = array([   0,    1,    3,    4,    6,    9,   11,   14,   15,   17,   18,
         19,   21,   23,   26,   28,   33,   ...  1944, 1951, 1953, 1954, 1956, 1960, 1963, 1967, 1975, 1978, 1983,
       1985, 1988, 1990, 1994, 1996], dtype=uint16)
 a_length   = 964
 b          = array([  7,  19,  23,  27,  37,  47,  49,  53,  54,  60,  66,  67,  72,
        79,  87, 110, 127, 134, 143, 150, 152, 153, 161, 163, 172, 173,
       187, 188], dtype=uint16)
 b_length   = 31
 capability = 'neon_f16'
 dtype      = 'uint16'
 expected   = 5
 first_length_bound = 1000
 result     = 6.0
 second_length_bound = 100

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_f16-100-1000-uint32-4-5] _________________

 dtype = 'uint32', first_length_bound = 1000, second_length_bound = 100
 capability = 'neon_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [114 148 186 195] from [   0    4    7   11   18   23   27   36   38   43   45   52   55   58
 E            63   64   66   72   73   77   78   79   80   81   83   89   90   91
 E            95   97  107  108  111  114  115  125  126  128  130  136  138  142
 E           143  148  149  151  156  158  160  161  180  181  182  184  186  191
 E           192  195  199  203  205  207  213  216  218  219  223  228  229  234
 E           243  244  245  246  254  255  256  258  265  273  274  277  278  279
 E           284  285  287  293  294  295  297  298  300  302  303  305  306  309
 E           311  314  318  321  323  328  329  330  332  338  346  347  350  352
 E           354  357  358  359  360  362  365  366  368  369  371  375  376  383
 E           384  388  389  390  392  393  395  397  398  400  403  410  412  413
 E           414  416  417  418  421  422  423  431  432  435  438  439  442  443
 E           445  451  456  460  462  463  464  465  467  486  492  494  498  500
 E           501  503  507  508  511  512  513  519  529  533  539  541  542  546
 E           550  551  552  553  555  556  558  561  564  566  570  576  578  582
 E           596  601  602  606  608  610  611  612  615  617  619  622  627  631
 E           632  642  643  644  646  649  658  659  660  664  668  674  679  683
 E           687  692  696  699  700  701  706  709  712  720  723  730  734  736
 E           741  750  757  758  759  760  768  787  788  789  790  793  798  803
 E           805  806  809  812  813  816  823  831  833  837  839  842  844  847
 E           853  854  855  857  860  862  866  871  881  883  887  888  889  898
 E           899  900  903  916  918  922  924  931  933  935  938  941  956  957
 E           958  959  961  962  969  973  974  975  984  990  991  997  998  999
 E          1002 1006 1016 1019 1021 1024 1032 1037 1040 1041 1043 1045 1048 1050
 E          1052 1053 1056 1058 1060 1066 1067 1068 1070 1077 1078 1081 1084 1088
 E          1089 1092 1094 1097 1098 1099 1102 1109 1111 1114 1116 1118 1119 1120
 E          1129 1130 1133 1138 1146 1147 1150 1152 1153 1155 1157 1158 1159 1161
 E          1162 1164 1169 1170 1177 1178 1183 1191 1192 1193 1195 1204 1210 1211
 E          1212 1216 1220 1221 1222 1224 1235 1236 1240 1241 1244 1245 1247 1248
 E          1249 1252 1257 1262 1263 1264 1267 1272 1276 1277 1278 1279 1280 1281
 E          1282 1288 1294 1295 1297 1299 1300 1313 1314 1315 1316 1317 1325 1329
 E          1335 1342 1343 1349 1350 1351 1352 1361 1362 1370 1372 1376 1377 1385
 E          1387 1388 1395 1407 1410 1416 1423 1425 1438 1443 1447 1448 1452 1459
 E          1462 1473 1476 1490 1492 1494 1496 1499 1501 1504 1505 1506 1510 1513
 E          1516 1520 1525 1531 1534 1536 1541 1542 1543 1553 1555 1565 1566 1567
 E          1568 1577 1584 1587 1592 1594 1597 1598 1615 1619 1623 1628 1635 1637
 E          1643 1644 1647 1649 1652 1653 1654 1655 1656 1661 1662 1665 1671 1678
 E          1679 1680 1682 1683 1684 1689 1692 1696 1697 1699 1707 1715 1718 1723
 E          1731 1733 1735 1744 1748 1758 1761 1766 1773 1778 1783 1786 1791 1792
 E          1793 1800 1803 1804 1806 1809 1813 1814 1815 1816 1825 1826 1834 1835
 E          1836 1837 1843 1846 1849 1850 1861 1862 1867 1876 1877 1880 1887 1889
 E          1890 1894 1897 1906 1909 1913 1914 1916 1917 1919 1920 1922 1923 1924
 E          1926 1927 1928 1931 1932 1933 1937 1942 1949 1951 1959 1963 1965 1967
 E          1969 1972 1974 1976 1978 1981 1985 1992 1993 1994 1998] and [ 20  28  61  69 114 135 148 167 186 188 195]
 E       assert 4 == 5
 E        +  where 4 = round(4.0)
 E        +    where 4.0 = float(4)
 E        +  and   5 = round(5.0)
 E        +    where 5.0 = float(5.0)

 a          = array([   0,    4,    7,   11,   18,   23,   27,   36,   38,   43,   45,
         52,   55,   58,   63,   64,   66,   ...  1951, 1959, 1963, 1965, 1967, 1969, 1972, 1974, 1976, 1978, 1981,
       1985, 1992, 1993, 1994, 1998], dtype=uint32)
 a_length   = 714
 b          = array([ 20,  28,  61,  69, 114, 135, 148, 167, 186, 188, 195],
      dtype=uint32)
 b_length   = 11
 capability = 'neon_f16'
 dtype      = 'uint32'
 expected   = 4
 first_length_bound = 1000
 result     = 5.0
 second_length_bound = 100

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_bf16-10-100-uint16-3-5] __________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 10
 capability = 'neon_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [  0   8   9  11  13  18  21  23  24  25  27  34  38  39  41  42  43  48
 E           50  56  57  61  68  69  70  77  80  84  88  90  92  93  96  98  99 112
 E          113 114 116 119 121 123 124 128 131 133 134 135 136 137 142 144 145 147
 E          152 154 157 161 163 164 165 166 167 168 170 171 174 178 180 185 188 189
 E          191 193 197] and [2]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([  0,   8,   9,  11,  13,  18,  21,  23,  24,  25,  27,  34,  38,
        39,  41,  42,  43,  48,  50,  56,  57,...4, 157, 161, 163, 164, 165, 166, 167, 168, 170,
       171, 174, 178, 180, 185, 188, 189, 191, 193, 197], dtype=uint16)
 a_length   = 92
 b          = array([2], dtype=uint16)
 b_length   = 1
 capability = 'neon_bf16'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 100
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_bf16-10-100-uint32-2-5] __________________

 dtype = 'uint32', first_length_bound = 100, second_length_bound = 10
 capability = 'neon_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 5 11] from [  0   3   5   7  11  13  19  20  22  29  30  40  52  58  59  61  63  65
 E           67  71  74  76  77  81  83  87  89  92  93  94  95  98 101 104 113 114
 E          116 118 119 120 125 128 130 133 141 146 151 155 156 157 160 162 164 171
 E          172 178 179 181 182 187 194] and [ 4  5 10 11 15 18]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([  0,   3,   5,   7,  11,  13,  19,  20,  22,  29,  30,  40,  52,
        58,  59,  61,  63,  65,  67,  71,  74,...8, 130, 133, 141, 146, 151, 155, 156, 157, 160, 162,
       164, 171, 172, 178, 179, 181, 182, 187, 194], dtype=uint32)
 a_length   = 69
 b          = array([ 4,  5, 10, 11, 15, 18], dtype=uint32)
 b_length   = 8
 capability = 'neon_bf16'
 dtype      = 'uint32'
 expected   = 2
 first_length_bound = 100
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_bf16-10-100-uint32-4-5] __________________

 dtype = 'uint32', first_length_bound = 100, second_length_bound = 10
 capability = 'neon_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 3 17] from [  0   3   6   8   9  10  13  17  24  28  31  38  43  44  47  49  52  53
 E           57  61  64  65  67  69  71  72  78  89  97  99 101 102 103 106 109 114
 E          117 122 125 127 128 135 137 139 140 141 142 143 145 147 150 152 155 156
 E          157 161 166 179 189 192] and [ 1  3  5 15 16 17 19]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([  0,   3,   6,   8,   9,  10,  13,  17,  24,  28,  31,  38,  43,
        44,  47,  49,  52,  53,  57,  61,  64,...8, 135, 137, 139, 140, 141, 142, 143, 145, 147, 150, 152,
       155, 156, 157, 161, 166, 179, 189, 192], dtype=uint32)
 a_length   = 69
 b          = array([ 1,  3,  5, 15, 16, 17, 19], dtype=uint32)
 b_length   = 7
 capability = 'neon_bf16'
 dtype      = 'uint32'
 expected   = 2
 first_length_bound = 100
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_bf16-10-1000-uint16-1-5] _________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 10
 capability = 'neon_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [8] from [   0    8    9   11   12   16   18   32   42   44   50   57   58   60
 E            66   69   70   92   93  101  104  105  113  120  121  124  127  134
 E           135  136  145  149  154  156  159  162  163  166  168  173  175  181
 E           188  209  218  233  236  244  247  266  272  282  286  293  295  296
 E           298  303  306  307  330  336  350  360  369  371  372  375  384  390
 E           393  398  403  410  414  426  430  431  432  438  443  444  449  452
 E           453  458  461  466  473  483  493  508  515  517  525  526  531  535
 E           539  545  551  556  557  559  563  564  584  591  595  598  607  623
 E           624  633  650  655  658  668  674  679  680  690  693  706  716  725
 E           730  735  740  747  759  763  768  769  770  771  773  776  789  792
 E           800  804  811  814  817  820  836  848  849  867  870  878  879  880
 E           881  891  892  904  910  931  936  942  959  960  962  967  969  978
 E           979  982  983  988  993 1001 1002 1004 1007 1014 1026 1031 1034 1042
 E          1043 1049 1053 1054 1055 1057 1063 1080 1081 1091 1094 1095 1098 1099
 E          1101 1103 1104 1105 1114 1122 1128 1131 1141 1153 1157 1163 1168 1173
 E          1176 1189 1197 1198 1202 1206 1210 1211 1212 1213 1227 1236 1243 1244
 E          1245 1249 1253 1273 1277 1289 1294 1301 1311 1319 1328 1330 1332 1336
 E          1345 1354 1362 1366 1369 1375 1381 1383 1386 1390 1395 1413 1425 1426
 E          1427 1435 1439 1447 1461 1462 1463 1466 1467 1469 1478 1485 1487 1488
 E          1490 1495 1498 1499 1500 1511 1519 1525 1532 1536 1537 1539 1544 1547
 E          1550 1561 1565 1575 1586 1592 1595 1606 1615 1635 1646 1647 1650 1653
 E          1673 1675 1683 1691 1695 1698 1699 1700 1702 1703 1707 1709 1717 1718
 E          1725 1728 1730 1733 1736 1737 1740 1746 1755 1756 1767 1770 1775 1780
 E          1784 1806 1811 1813 1816 1822 1824 1842 1846 1854 1858 1866 1873 1880
 E          1896 1898 1913 1918 1920 1923 1926 1942 1945 1951 1953 1956 1961 1972
 E          1973 1974 1980 1995] and [6 8]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([   0,    8,    9,   11,   12,   16,   18,   32,   42,   44,   50,
         57,   58,   60,   66,   69,   70,   ... 1918, 1920,
       1923, 1926, 1942, 1945, 1951, 1953, 1956, 1961, 1972, 1973, 1974,
       1980, 1995], dtype=uint16)
 a_length   = 392
 b          = array([6, 8], dtype=uint16)
 b_length   = 2
 capability = 'neon_bf16'
 dtype      = 'uint16'
 expected   = 1
 first_length_bound = 1000
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_bf16-10-1000-uint16-5-5] _________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 10
 capability = 'neon_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [   0    2   16   19   23   34   35   36   39   45   56   57   61   62
 E            65   66   70   92  102  103  107  108  112  120  122  128  132  137
 E           138  141  148  156  157  162  163  172  182  190  191  201  202  220
 E           222  226  230  232  239  242  245  247  254  257  258  260  263  268
 E           279  287  288  290  294  295  300  307  308  315  328  329  331  332
 E           340  341  344  345  348  350  354  360  379  380  386  388  398  408
 E           411  412  414  420  423  429  436  455  460  462  471  473  475  477
 E           482  488  490  499  514  518  519  523  535  540  553  556  557  562
 E           571  580  581  583  586  587  593  599  601  617  622  627  629  630
 E           633  636  639  641  642  643  649  659  660  661  671  674  677  682
 E           684  685  692  695  696  709  717  721  725  726  734  740  749  756
 E           761  765  767  770  772  777  778  780  790  800  802  809  821  825
 E           828  830  832  835  848  857  858  861  862  867  873  875  876  884
 E           887  888  890  893  895  898  899  902  904  906  910  922  926  933
 E           948  957  974  979  980  981  984  985 1005 1027 1036 1042 1051 1052
 E          1053 1055 1059 1060 1070 1076 1078 1081 1090 1095 1102 1105 1115 1127
 E          1130 1133 1138 1144 1152 1165 1179 1181 1185 1188 1206 1209 1211 1220
 E          1231 1242 1248 1260 1262 1266 1275 1281 1283 1285 1286 1295 1297 1299
 E          1311 1312 1336 1342 1344 1346 1349 1366 1367 1371 1373 1374 1391 1399
 E          1402 1408 1409 1414 1422 1423 1426 1438 1442 1463 1469 1473 1496 1506
 E          1507 1509 1529 1537 1541 1549 1552 1560 1571 1576 1577 1579 1580 1586
 E          1590 1595 1599 1600 1601 1606 1607 1613 1615 1617 1634 1636 1660 1667
 E          1670 1673 1676 1685 1686 1692 1693 1697 1702 1707 1712 1714 1722 1726
 E          1727 1732 1734 1745 1747 1765 1766 1774 1782 1783 1785 1799 1813 1816
 E          1819 1830 1849 1852 1855 1858 1861 1863 1870 1872 1878 1885 1908 1909
 E          1912 1919 1921 1923 1929 1933 1936 1939 1944 1947 1957 1966 1970 1973
 E          1996 1998] and [ 5 15 18]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([   0,    2,   16,   19,   23,   34,   35,   36,   39,   45,   56,
         57,   61,   62,   65,   66,   70,   ... 1919,
       1921, 1923, 1929, 1933, 1936, 1939, 1944, 1947, 1957, 1966, 1970,
       1973, 1996, 1998], dtype=uint16)
 a_length   = 404
 b          = array([ 5, 15, 18], dtype=uint16)
 b_length   = 3
 capability = 'neon_bf16'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 1000
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_bf16-100-10-uint16-5-5] __________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 100
 capability = 'neon_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 1 16] from [ 0  1 12 15 16] and [  1   9  16  18  31  36  49  57  59  60  67  73  80  95 110 111 116 121
 E          125 128 129 142 163 192 194 195 199]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([ 0,  1, 12, 15, 16], dtype=uint16)
 a_length   = 5
 b          = array([  1,   9,  16,  18,  31,  36,  49,  57,  59,  60,  67,  73,  80,
        95, 110, 111, 116, 121, 125, 128, 129, 142, 163, 192, 194, 195,
       199], dtype=uint16)
 b_length   = 28
 capability = 'neon_bf16'
 dtype      = 'uint16'
 expected   = 2
 first_length_bound = 10
 result     = 3.0
 second_length_bound = 100

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[neon_i8-10-10-uint16-1-5] ___________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 10
 capability = 'neon_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [ 0  1  3  5 10 15 16] and [17]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([ 0,  1,  3,  5, 10, 15, 16], dtype=uint16)
 a_length   = 7
 b          = array([17], dtype=uint16)
 b_length   = 1
 capability = 'neon_i8'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 10
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[neon_i8-10-10-uint16-3-5] ___________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 10
 capability = 'neon_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [ 0  3  9 10 11 18] and [ 1  2  7 15 19]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([ 0,  3,  9, 10, 11, 18], dtype=uint16)
 a_length   = 6
 b          = array([ 1,  2,  7, 15, 19], dtype=uint16)
 b_length   = 6
 capability = 'neon_i8'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 10
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[neon_i8-10-10-uint32-4-5] ___________________

 dtype = 'uint32', first_length_bound = 10, second_length_bound = 10
 capability = 'neon_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 6 18] from [ 0  5  6 12 13 14 15 18] and [ 6 17 18]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([ 0,  5,  6, 12, 13, 14, 15, 18], dtype=uint32)
 a_length   = 9
 b          = array([ 6, 17, 18], dtype=uint32)
 b_length   = 4
 capability = 'neon_i8'
 dtype      = 'uint32'
 expected   = 2
 first_length_bound = 10
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[neon_i8-10-10-uint32-5-5] ___________________

 dtype = 'uint32', first_length_bound = 10, second_length_bound = 10
 capability = 'neon_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [2] from [0 2 6] and [2 9]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([0, 2, 6], dtype=uint32)
 a_length   = 3
 b          = array([2, 9], dtype=uint32)
 b_length   = 2
 capability = 'neon_i8'
 dtype      = 'uint32'
 expected   = 1
 first_length_bound = 10
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[neon_i8-10-100-uint16-3-5] ___________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 10
 capability = 'neon_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [  0   7  10  12  13  15  17  20  22  28  30  32  39  52  54  57  59  63
 E           64  66  67  68  72  73  74  75  82  88  89  91  94  96  99 100 103 104
 E          105 116 121 122 123 125 126 127 129 133 138 145 146 148 150 151 153 156
 E          158 160 163 164 165 167 169 172 176 180 182 186 187 191 195 199] and [9]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([  0,   7,  10,  12,  13,  15,  17,  20,  22,  28,  30,  32,  39,
        52,  54,  57,  59,  63,  64,  66,  67,...
       153, 156, 158, 160, 163, 164, 165, 167, 169, 172, 176, 180, 182,
       186, 187, 191, 195, 199], dtype=uint16)
 a_length   = 82
 b          = array([9], dtype=uint16)
 b_length   = 1
 capability = 'neon_i8'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 100
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[neon_i8-10-1000-uint16-1-5] __________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 10
 capability = 'neon_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [   0    2    7   13   15   16   17   19   24   25   30   32   33   35
 E            37   39   46   47   48   50   52   57   62   65   66   68   70   72
 E            75   81   85   88   95   98  100  104  105  108  110  114  115  122
 E           123  125  127  128  131  133  135  136  138  139  143  145  149  153
 E           155  159  160  165  173  182  195  196  199  200  203  212  215  217
 E           221  222  226  227  232  239  243  250  251  254  255  256  259  262
 E           263  265  266  267  269  278  284  286  287  288  300  303  304  305
 E           306  309  310  318  319  320  321  323  324  326  332  334  342  343
 E           349  353  362  365  368  369  370  371  374  375  384  389  390  391
 E           392  395  396  398  399  411  413  421  433  439  440  441  443  451
 E           452  453  457  458  463  464  467  469  474  485  490  494  496  497
 E           500  502  510  513  514  520  523  527  528  532  534  535  538  539
 E           541  544  545  551  559  562  563  565  566  568  571  576  577  579
 E           580  582  585  587  588  589  591  592  595  596  597  599  605  606
 E           615  617  618  621  622  629  632  637  643  644  645  646  651  653
 E           655  656  657  662  666  669  673  674  675  680  681  686  687  688
 E           689  700  703  711  717  718  719  721  722  724  727  729  731  732
 E           739  741  744  747  751  752  753  754  755  760  761  764  766  768
 E           770  771  777  784  786  787  790  791  793  795  803  805  809  813
 E           815  822  829  831  835  838  840  843  845  850  852  858  863  864
 E           865  866  869  875  881  889  891  892  895  897  898  900  901  908
 E           910  914  922  926  930  936  940  941  943  944  950  955  956  957
 E           966  974  980  986  989  996 1002 1004 1008 1011 1013 1014 1019 1021
 E          1023 1024 1026 1031 1037 1039 1044 1046 1052 1053 1056 1058 1059 1062
 E          1067 1073 1074 1077 1078 1083 1085 1086 1087 1090 1091 1092 1101 1103
 E          1105 1110 1114 1115 1119 1120 1127 1133 1135 1141 1146 1151 1154 1160
 E          1161 1167 1169 1171 1172 1173 1175 1176 1177 1180 1181 1182 1190 1193
 E          1194 1196 1200 1201 1202 1206 1208 1217 1218 1219 1223 1225 1228 1230
 E          1245 1256 1257 1261 1263 1264 1265 1267 1270 1271 1275 1276 1277 1279
 E          1283 1284 1286 1290 1294 1298 1299 1300 1302 1303 1304 1305 1311 1313
 E          1318 1331 1332 1337 1344 1348 1349 1352 1353 1358 1360 1364 1368 1369
 E          1370 1379 1380 1382 1384 1387 1390 1391 1392 1395 1400 1401 1408 1409
 E          1410 1411 1423 1440 1442 1443 1447 1451 1452 1455 1456 1457 1458 1462
 E          1466 1469 1471 1472 1473 1474 1479 1482 1486 1493 1494 1496 1498 1500
 E          1502 1509 1513 1516 1517 1526 1531 1532 1535 1542 1545 1547 1549 1558
 E          1560 1561 1565 1575 1576 1580 1581 1582 1584 1596 1597 1598 1600 1608
 E          1609 1612 1614 1615 1616 1628 1629 1630 1640 1642 1650 1651 1654 1656
 E          1657 1664 1665 1667 1668 1670 1671 1674 1675 1676 1677 1679 1681 1685
 E          1687 1688 1689 1691 1692 1697 1699 1701 1704 1708 1709 1711 1712 1714
 E          1716 1718 1722 1730 1732 1734 1739 1741 1747 1749 1751 1752 1754 1756
 E          1758 1762 1763 1772 1773 1774 1775 1776 1777 1782 1784 1787 1788 1790
 E          1794 1795 1806 1808 1811 1813 1815 1820 1821 1823 1826 1828 1836 1839
 E          1841 1845 1855 1860 1864 1869 1870 1871 1872 1873 1883 1885 1886 1888
 E          1890 1892 1897 1900 1901 1903 1905 1908 1911 1918 1922 1924 1934 1936
 E          1937 1938 1939 1951 1952 1953 1956 1958 1963 1964 1971 1979 1984 1987
 E          1994 1995 1996 1997] and [6]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([   0,    2,    7,   13,   15,   16,   17,   19,   24,   25,   30,
         32,   33,   35,   37,   39,   46,   ..., 1939, 1951, 1952, 1953, 1956, 1958, 1963, 1964, 1971,
       1979, 1984, 1987, 1994, 1995, 1996, 1997], dtype=uint16)
 a_length   = 747
 b          = array([6], dtype=uint16)
 b_length   = 1
 capability = 'neon_i8'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 1000
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[neon_i8-10-1000-uint32-4-5] __________________

 dtype = 'uint32', first_length_bound = 1000, second_length_bound = 10
 capability = 'neon_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [   0    1    7    8   12   16   24   25   28   33   36   37   40   41
 E            42   43   49   51   52   54   55   56   58   59   60   63   69   75
 E            82   83   87   97   98  104  109  114  117  120  125  128  130  134
 E           135  137  141  146  149  156  160  167  173  176  178  182  187  188
 E           192  196  207  213  216  230  233  240  250  251  253  262  266  279
 E           282  284  285  292  294  304  311  314  315  317  318  326  330  331
 E           332  340  341  347  348  350  351  354  358  361  362  381  388  393
 E           396  397  401  407  413  418  421  427  430  433  444  452  453  455
 E           461  464  469  475  484  488  506  508  516  518  532  534  535  544
 E           545  548  556  558  560  561  568  569  578  600  603  605  607  614
 E           620  622  635  637  640  643  644  648  651  654  657  658  661  665
 E           667  675  679  692  694  696  697  698  700  701  702  707  711  712
 E           714  720  726  728  731  733  735  738  739  740  741  742  743  751
 E           756  766  770  774  777  778  779  788  790  791  798  807  814  816
 E           818  821  823  824  829  838  842  849  850  854  855  857  862  864
 E           866  873  883  884  886  892  914  918  928  932  933  935  939  940
 E           942  947  950  962  972  983  987  988  993  998 1001 1008 1010 1013
 E          1032 1033 1034 1035 1037 1039 1045 1054 1055 1056 1058 1060 1068 1073
 E          1077 1080 1081 1086 1087 1089 1094 1099 1104 1107 1110 1111 1112 1119
 E          1120 1122 1128 1129 1133 1136 1137 1139 1141 1142 1154 1157 1158 1160
 E          1167 1171 1178 1181 1184 1195 1198 1199 1202 1205 1206 1209 1211 1213
 E          1218 1220 1221 1226 1234 1237 1238 1244 1245 1246 1253 1257 1263 1267
 E          1271 1272 1277 1280 1283 1288 1292 1296 1301 1302 1307 1308 1309 1310
 E          1311 1317 1328 1340 1345 1346 1347 1357 1382 1383 1388 1391 1392 1394
 E          1395 1397 1400 1407 1409 1414 1419 1432 1433 1438 1449 1451 1452 1454
 E          1455 1457 1472 1479 1483 1487 1493 1494 1496 1500 1505 1508 1509 1517
 E          1519 1521 1522 1528 1531 1539 1542 1552 1554 1562 1567 1568 1571 1574
 E          1575 1585 1586 1587 1599 1602 1605 1610 1618 1623 1629 1631 1633 1646
 E          1652 1653 1666 1673 1686 1692 1693 1695 1696 1706 1707 1708 1712 1715
 E          1717 1720 1735 1736 1748 1752 1760 1763 1774 1775 1781 1795 1816 1821
 E          1822 1826 1835 1847 1850 1853 1861 1862 1870 1873 1878 1890 1891 1895
 E          1897 1899 1904 1908 1914 1915 1917 1921 1922 1925 1929 1930 1939 1944
 E          1949 1952 1955 1957 1959 1962 1964 1973 1978 1980 1982 1988 1992 1993] and [14 19]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([   0,    1,    7,    8,   12,   16,   24,   25,   28,   33,   36,
         37,   40,   41,   42,   43,   49,   ..., 1944, 1949, 1952, 1955,
       1957, 1959, 1962, 1964, 1973, 1978, 1980, 1982, 1988, 1992, 1993],
      dtype=uint32)
 a_length   = 533
 b          = array([14, 19], dtype=uint32)
 b_length   = 2
 capability = 'neon_i8'
 dtype      = 'uint32'
 expected   = 0
 first_length_bound = 1000
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[neon_i8-100-1000-uint16-2-5] __________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 100
 capability = 'neon_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 67  72 120 186] from [   0    1    2    3    6   13   19   22   34   35   36   41   51   55
 E            56   58   67   72   73   76   81   82   85   90   92   95   97   99
 E           101  103  107  108  111  113  115  118  120  126  130  131  133  136
 E           138  140  144  148  150  153  154  158  165  167  169  170  174  178
 E           181  183  186  190  195  199  200  202  203  204  212  217  219  220
 E           222  225  230  231  233  234  244  250  252  256  257  259  260  264
 E           266  269  270  277  278  282  285  293  295  296  297  303  305  306
 E           309  312  318  320  321  322  325  329  330  333  336  337  339  340
 E           342  343  349  350  352  353  355  356  357  358  362  364  365  372
 E           375  377  386  391  395  396  399  400  410  411  412  415  416  425
 E           428  430  431  432  436  440  444  449  450  451  452  460  462  464
 E           465  467  470  478  481  482  488  494  496  498  499  500  501  502
 E           503  504  505  508  510  512  514  519  521  526  528  529  534  541
 E           544  548  553  557  558  561  562  563  566  568  569  570  573  577
 E           579  585  586  589  599  600  601  611  613  617  619  621  622  626
 E           627  629  634  637  649  655  658  665  666  667  670  674  676  677
 E           678  680  684  686  687  693  696  697  698  700  702  703  717  718
 E           720  724  726  730  731  734  741  742  745  749  750  753  758  760
 E           761  762  763  766  767  770  772  774  780  781  785  787  788  793
 E           794  795  797  802  805  810  815  816  819  821  822  823  827  829
 E           830  831  835  838  840  845  848  849  850  851  852  855  857  858
 E           860  862  870  872  874  878  880  887  889  890  891  892  895  898
 E           900  907  908  910  913  916  921  923  924  925  930  931  933  934
 E           935  937  941  951  954  956  959  960  964  968  969  973  977  980
 E           981  982  986  988  990  995  998 1003 1004 1006 1011 1012 1014 1016
 E          1020 1028 1032 1034 1037 1038 1039 1044 1045 1050 1052 1062 1064 1065
 E          1068 1071 1077 1086 1088 1092 1094 1095 1096 1101 1102 1108 1109 1110
 E          1116 1118 1121 1125 1126 1128 1129 1132 1135 1138 1140 1142 1145 1150
 E          1151 1152 1156 1161 1162 1164 1172 1174 1181 1182 1183 1184 1186 1188
 E          1189 1195 1199 1200 1211 1213 1215 1217 1218 1220 1225 1227 1229 1230
 E          1233 1238 1239 1241 1243 1246 1247 1250 1251 1252 1254 1255 1256 1258
 E          1261 1262 1266 1270 1274 1275 1276 1279 1283 1285 1286 1289 1299 1300
 E          1304 1306 1308 1310 1311 1312 1313 1314 1316 1321 1323 1327 1335 1344
 E          1346 1348 1350 1351 1352 1354 1356 1357 1361 1364 1367 1374 1380 1381
 E          1382 1387 1388 1389 1395 1396 1398 1399 1401 1404 1406 1408 1409 1415
 E          1425 1427 1429 1430 1431 1432 1435 1439 1440 1445 1447 1450 1451 1453
 E          1455 1457 1458 1461 1462 1469 1470 1472 1474 1476 1479 1480 1481 1482
 E          1484 1485 1486 1487 1488 1495 1505 1510 1514 1515 1519 1522 1523 1525
 E          1531 1533 1540 1541 1542 1543 1545 1546 1552 1553 1556 1559 1566 1569
 E          1579 1580 1581 1583 1587 1590 1594 1595 1596 1598 1599 1600 1602 1607
 E          1609 1611 1615 1618 1619 1621 1623 1624 1632 1635 1636 1638 1643 1646
 E          1650 1656 1658 1665 1666 1670 1671 1672 1673 1675 1676 1678 1682 1692
 E          1696 1697 1700 1703 1705 1710 1713 1718 1720 1721 1723 1730 1733 1737
 E          1738 1742 1744 1753 1754 1755 1761 1762 1770 1776 1785 1786 1789 1792
 E          1794 1798 1801 1807 1808 1812 1813 1816 1817 1818 1826 1827 1832 1835
 E          1837 1839 1840 1843 1847 1849 1852 1853 1854 1857 1859 1862 1863 1866
 E          1868 1870 1873 1875 1879 1880 1881 1883 1885 1886 1888 1889 1897 1899
 E          1901 1903 1904 1906 1909 1913 1914 1917 1926 1927 1929 1938 1939 1944
 E          1946 1949 1952 1953 1954 1956 1958 1959 1960 1961 1968 1970 1974 1975
 E          1977 1983 1984 1986 1987 1988 1989 1992 1994 1998] and [ 16  23  40  47  60  67  72  91  93 114 120 134 152 176 180 186]
 E       assert 4 == 5
 E        +  where 4 = round(4.0)
 E        +    where 4.0 = float(4)
 E        +  and   5 = round(5.0)
 E        +    where 5.0 = float(5.0)

 a          = array([   0,    1,    2,    3,    6,   13,   19,   22,   34,   35,   36,
         41,   51,   55,   56,   58,   67,   ... 1961,
       1968, 1970, 1974, 1975, 1977, 1983, 1984, 1986, 1987, 1988, 1989,
       1992, 1994, 1998], dtype=uint16)
 a_length   = 863
 b          = array([ 16,  23,  40,  47,  60,  67,  72,  91,  93, 114, 120, 134, 152,
       176, 180, 186], dtype=uint16)
 b_length   = 16
 capability = 'neon_i8'
 dtype      = 'uint16'
 expected   = 4
 first_length_bound = 1000
 result     = 5.0
 second_length_bound = 100

 scripts/test.py:1059: AssertionError
 _____________________ test_intersect[sve-10-10-uint16-2-5] _____________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 10
 capability = 'sve'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [ 0  2 12 17 19] and [8]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([ 0,  2, 12, 17, 19], dtype=uint16)
 a_length   = 6
 b          = array([8], dtype=uint16)
 b_length   = 1
 capability = 'sve'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 10
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _____________________ test_intersect[sve-10-10-uint16-4-5] _____________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 10
 capability = 'sve'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [11] from [ 0  2 11] and [ 1  5  9 11 13 16 19]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([ 0,  2, 11], dtype=uint16)
 a_length   = 4
 b          = array([ 1,  5,  9, 11, 13, 16, 19], dtype=uint16)
 b_length   = 8
 capability = 'sve'
 dtype      = 'uint16'
 expected   = 1
 first_length_bound = 10
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _____________________ test_intersect[sve-10-10-uint16-5-5] _____________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 10
 capability = 'sve'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [ 0 11 19] and [ 3  4  5  9 12 15 18]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([ 0, 11, 19], dtype=uint16)
 a_length   = 4
 b          = array([ 3,  4,  5,  9, 12, 15, 18], dtype=uint16)
 b_length   = 8
 capability = 'sve'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 10
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _____________________ test_intersect[sve-10-10-uint32-1-5] _____________________

 dtype = 'uint32', first_length_bound = 10, second_length_bound = 10
 capability = 'sve'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [3 6] from [ 0  2  3  6 10 12 14 15] and [ 3  4  5  6  8 18]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([ 0,  2,  3,  6, 10, 12, 14, 15], dtype=uint32)
 a_length   = 9
 b          = array([ 3,  4,  5,  6,  8, 18], dtype=uint32)
 b_length   = 9
 capability = 'sve'
 dtype      = 'uint32'
 expected   = 2
 first_length_bound = 10
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ____________________ test_intersect[sve-10-100-uint16-4-5] _____________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 10
 capability = 'sve'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [10] from [  0   1   3   5  10  11  12  13  14  15  21  23  28  33  36  39  42  46
 E           53  55  58  59  64  65  74  75  78  79  82  87  89  92  93  97  98 101
 E          103 106 107 108 112 113 114 115 117 119 121 125 129 130 133 134 135 138
 E          142 143 144 145 146 148 150 152 155 159 162 164 166 168 169 174 175 181
 E          187 189 191 194 198 199] and [ 4  6  7  9 10 19]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([  0,   1,   3,   5,  10,  11,  12,  13,  14,  15,  21,  23,  28,
        33,  36,  39,  42,  46,  53,  55,  58,..., 150, 152, 155, 159, 162,
       164, 166, 168, 169, 174, 175, 181, 187, 189, 191, 194, 198, 199],
      dtype=uint16)
 a_length   = 99
 b          = array([ 4,  6,  7,  9, 10, 19], dtype=uint16)
 b_length   = 7
 capability = 'sve'
 dtype      = 'uint16'
 expected   = 1
 first_length_bound = 100
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ____________________ test_intersect[sve-10-100-uint32-1-5] _____________________

 dtype = 'uint32', first_length_bound = 100, second_length_bound = 10
 capability = 'sve'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [2] from [  0   2   5   7   8  11  15  17  18  19  23  25  33  35  38  43  47  52
 E           55  56  57  58  60  62  66  69  73  76  86  90  92  95  96 104 112 113
 E          117 118 121 125 126 127 129 130 133 135 136 139 140 142 144 145 147 148
 E          150 151 154 155 157 168 169 173 176 177 181 182 184 190 192 193 196 199] and [ 2 12 14]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([  0,   2,   5,   7,   8,  11,  15,  17,  18,  19,  23,  25,  33,
        35,  38,  43,  47,  52,  55,  56,  57,...7, 148, 150, 151, 154, 155, 157, 168, 169, 173, 176, 177, 181,
       182, 184, 190, 192, 193, 196, 199], dtype=uint32)
 a_length   = 82
 b          = array([ 2, 12, 14], dtype=uint32)
 b_length   = 3
 capability = 'sve'
 dtype      = 'uint32'
 expected   = 1
 first_length_bound = 100
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ____________________ test_intersect[sve-10-100-uint32-4-5] _____________________

 dtype = 'uint32', first_length_bound = 100, second_length_bound = 10
 capability = 'sve'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [10 13] from [  0   4  10  13  24  34  35  39  40  44  53  54  56  62  66  69  71  75
 E           76  78  83  90  91  93  97  99 112 115 116 118 119 121 125 127 129 131
 E          137 138 141 143 147 149 155 161 185 188] and [10 13 19]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([  0,   4,  10,  13,  24,  34,  35,  39,  40,  44,  53,  54,  56,
        62,  66,  69,  71,  75,  76,  78,  83,...2, 115, 116, 118, 119, 121, 125, 127, 129, 131, 137, 138, 141,
       143, 147, 149, 155, 161, 185, 188], dtype=uint32)
 a_length   = 48
 b          = array([10, 13, 19], dtype=uint32)
 b_length   = 3
 capability = 'sve'
 dtype      = 'uint32'
 expected   = 2
 first_length_bound = 100
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ____________________ test_intersect[sve-10-1000-uint16-2-5] ____________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 10
 capability = 'sve'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [   0    2    7   29   32   36   40   42   46   48   52   53   57   58
 E            63   70   74   76   78   86   91   96  108  111  119  139  144  147
 E           151  155  156  158  162  166  170  174  177  189  195  201  205  214
 E           226  231  234  237  238  239  241  248  253  257  258  265  267  276
 E           279  286  287  292  298  305  307  308  309  318  320  323  326  331
 E           336  338  340  346  347  348  357  360  363  367  374  382  383  396
 E           404  407  414  420  430  431  436  438  444  448  451  453  460  462
 E           468  479  490  500  504  508  514  519  522  526  532  537  540  546
 E           548  549  557  558  563  565  566  567  571  575  579  583  585  600
 E           602  606  607  615  620  626  627  637  644  647  662  664  669  675
 E           680  683  692  705  707  712  713  723  724  725  726  727  728  738
 E           743  754  770  775  777  780  786  793  796  797  801  811  812  821
 E           831  832  837  839  842  852  873  876  883  886  890  895  904  908
 E           910  918  924  927  930  935  936  959  961  966  975  976  982  990
 E           995  997 1008 1014 1023 1029 1033 1034 1036 1053 1058 1073 1075 1077
 E          1080 1083 1086 1090 1092 1093 1097 1098 1100 1105 1110 1113 1115 1125
 E          1126 1142 1144 1146 1147 1148 1153 1154 1159 1161 1170 1175 1176 1178
 E          1182 1186 1188 1204 1207 1217 1224 1233 1234 1236 1239 1241 1243 1249
 E          1250 1251 1252 1255 1257 1258 1260 1266 1267 1271 1272 1279 1284 1286
 E          1289 1291 1292 1296 1301 1306 1311 1315 1317 1318 1322 1323 1325 1328
 E          1331 1335 1340 1341 1348 1352 1353 1376 1377 1383 1395 1396 1399 1410
 E          1412 1419 1420 1424 1431 1434 1442 1447 1451 1464 1466 1470 1481 1485
 E          1486 1487 1493 1499 1505 1507 1511 1517 1520 1522 1523 1531 1533 1537
 E          1546 1552 1553 1554 1557 1562 1563 1567 1573 1577 1580 1585 1592 1594
 E          1596 1599 1600 1603 1610 1615 1622 1633 1635 1641 1652 1653 1654 1669
 E          1670 1678 1680 1686 1701 1704 1711 1713 1717 1719 1728 1731 1732 1744
 E          1758 1762 1769 1770 1777 1778 1779 1782 1783 1785 1786 1790 1795 1800
 E          1806 1813 1814 1815 1819 1820 1828 1831 1835 1847 1858 1860 1863 1864
 E          1869 1872 1873 1878 1883 1891 1894 1901 1911 1926 1935 1936 1950 1956
 E          1969 1975 1979 1981 1989 1990 1999] and [ 3 10 14 15 17 19]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([   0,    2,    7,   29,   32,   36,   40,   42,   46,   48,   52,
         53,   57,   58,   63,   70,   74,   ..., 1891, 1894, 1901, 1911, 1926, 1935, 1936, 1950, 1956, 1969,
       1975, 1979, 1981, 1989, 1990, 1999], dtype=uint16)
 a_length   = 472
 b          = array([ 3, 10, 14, 15, 17, 19], dtype=uint16)
 b_length   = 6
 capability = 'sve'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 1000
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ____________________ test_intersect[sve-100-100-uint16-5-5] ____________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 100
 capability = 'sve'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [  0   2   9  24  39  41  44  53  55  58  63  74  84 106 108 117 119 132
 E          133 143 148 167 168 178 180 188 197] and [ 49  71  78  93 150 158 159 183 184]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([  0,   2,   9,  24,  39,  41,  44,  53,  55,  58,  63,  74,  84,
       106, 108, 117, 119, 132, 133, 143, 148, 167, 168, 178, 180, 188,
       197], dtype=uint16)
 a_length   = 29
 b          = array([ 49,  71,  78,  93, 150, 158, 159, 183, 184], dtype=uint16)
 b_length   = 9
 capability = 'sve'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 100
 result     = 1.0
 second_length_bound = 100

 scripts/test.py:1059: AssertionError
 ____________________ test_intersect[sve-1000-10-uint32-5-5] ____________________

 dtype = 'uint32', first_length_bound = 10, second_length_bound = 1000
 capability = 'sve'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [ 0  2 11 18] and [ 471  503  813 1110 1435 1793 1968]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([ 0,  2, 11, 18], dtype=uint32)
 a_length   = 4
 b          = array([ 471,  503,  813, 1110, 1435, 1793, 1968], dtype=uint32)
 b_length   = 7
 capability = 'sve'
 dtype      = 'uint32'
 expected   = 0
 first_length_bound = 10
 result     = 1.0
 second_length_bound = 1000

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[sve_f16-10-10-uint32-3-5] ___________________

 dtype = 'uint32', first_length_bound = 10, second_length_bound = 10
 capability = 'sve_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [ 0  7 17] and [2]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([ 0,  7, 17], dtype=uint32)
 a_length   = 3
 b          = array([2], dtype=uint32)
 b_length   = 1
 capability = 'sve_f16'
 dtype      = 'uint32'
 expected   = 0
 first_length_bound = 10
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[sve_f16-10-10-uint32-5-5] ___________________

 dtype = 'uint32', first_length_bound = 10, second_length_bound = 10
 capability = 'sve_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 9 15] from [ 0  5  6  7  9 10 15] and [ 3  9 13 15 17 18]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([ 0,  5,  6,  7,  9, 10, 15], dtype=uint32)
 a_length   = 9
 b          = array([ 3,  9, 13, 15, 17, 18], dtype=uint32)
 b_length   = 6
 capability = 'sve_f16'
 dtype      = 'uint32'
 expected   = 2
 first_length_bound = 10
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_f16-10-100-uint16-2-5] ___________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 10
 capability = 'sve_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [  0   1   6   8  27  32  37  40  46  50  57  67  70  77  80  82  83  84
 E           86  88  97 115 116 122 140 144 147 160 161 163 179 187] and [13]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([  0,   1,   6,   8,  27,  32,  37,  40,  46,  50,  57,  67,  70,
        77,  80,  82,  83,  84,  86,  88,  97, 115, 116, 122, 140, 144,
       147, 160, 161, 163, 179, 187], dtype=uint16)
 a_length   = 35
 b          = array([13], dtype=uint16)
 b_length   = 1
 capability = 'sve_f16'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 100
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_f16-10-100-uint32-2-5] ___________________

 dtype = 'uint32', first_length_bound = 100, second_length_bound = 10
 capability = 'sve_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [2 5 7] from [  0   1   2   5   7  10  14  17  22  25  26  27  30  33  36  42  45  50
 E           62  65  69  73  74  77  79  84  89  93  98  99 101 114 124 129 144 146
 E          147 160 161 172 178 183 190 192 193] and [ 2  3  4  5  7 12 13 19]
 E       assert 3 == 4
 E        +  where 3 = round(3.0)
 E        +    where 3.0 = float(3)
 E        +  and   4 = round(4.0)
 E        +    where 4.0 = float(4.0)

 a          = array([  0,   1,   2,   5,   7,  10,  14,  17,  22,  25,  26,  27,  30,
        33,  36,  42,  45,  50,  62,  65,  69,...    89,  93,  98,  99, 101, 114, 124, 129, 144, 146, 147, 160, 161,
       172, 178, 183, 190, 192, 193], dtype=uint32)
 a_length   = 54
 b          = array([ 2,  3,  4,  5,  7, 12, 13, 19], dtype=uint32)
 b_length   = 9
 capability = 'sve_f16'
 dtype      = 'uint32'
 expected   = 3
 first_length_bound = 100
 result     = 4.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_f16-10-1000-uint16-1-5] __________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 10
 capability = 'sve_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 5  7 14 16] from [   0    3    5    6    7   12   14   16   17   20   22   23   25   26
 E            27   32   33   34   37   42   44   48   51   55   57   71   73   76
 E            78   81   84   86   87   90   91   93   96   97   99  101  102  103
 E           113  114  116  118  122  125  130  132  133  135  147  152  153  154
 E           157  159  160  165  166  170  172  174  175  182  183  186  188  195
 E           198  201  202  203  208  210  218  219  220  221  222  224  226  227
 E           234  235  237  238  239  242  244  245  246  247  248  252  256  258
 E           259  260  262  263  265  266  269  272  273  277  278  279  280  282
 E           288  290  294  295  296  300  302  311  317  320  321  322  324  328
 E           329  330  333  335  337  339  341  343  344  349  350  357  358  359
 E           360  365  366  367  368  370  371  372  373  375  377  378  381  382
 E           393  394  403  404  406  410  411  414  416  417  419  420  421  423
 E           424  425  427  432  437  439  442  444  447  449  450  453  457  462
 E           463  465  466  468  471  472  473  474  475  477  478  480  482  483
 E           485  487  488  499  503  504  505  506  509  513  514  518  519  522
 E           528  531  532  537  538  540  542  548  549  550  553  557  559  560
 E           561  562  563  565  567  568  573  579  580  582  584  585  586  587
 E           593  595  596  602  608  615  618  619  623  624  625  627  629  632
 E           634  635  636  638  645  649  651  659  662  664  665  669  670  671
 E           681  683  688  691  693  694  695  697  699  700  704  706  707  709
 E           711  712  713  718  721  725  728  733  735  736  737  739  743  744
 E           745  747  751  757  767  770  774  775  776  778  779  782  787  793
 E           795  800  801  805  809  811  813  815  817  818  819  821  823  827
 E           833  834  837  839  841  842  843  845  847  848  851  853  856  857
 E           859  860  861  862  863  865  868  869  870  871  873  877  878  883
 E           886  889  891  894  900  904  907  910  929  930  937  938  939  940
 E           943  945  947  950  951  963  965  966  967  969  970  971  972  976
 E           989  995 1002 1003 1005 1006 1007 1009 1011 1012 1016 1017 1020 1024
 E          1025 1026 1028 1033 1036 1037 1042 1046 1051 1052 1053 1054 1057 1059
 E          1063 1064 1066 1067 1069 1071 1072 1076 1081 1083 1084 1085 1087 1092
 E          1100 1102 1103 1104 1105 1108 1109 1110 1112 1113 1116 1126 1127 1128
 E          1135 1137 1143 1146 1147 1151 1154 1155 1160 1161 1164 1165 1166 1167
 E          1168 1169 1170 1171 1174 1175 1177 1184 1186 1188 1191 1193 1195 1196
 E          1201 1203 1204 1205 1206 1207 1210 1212 1215 1220 1222 1223 1226 1228
 E          1234 1236 1238 1241 1243 1245 1246 1248 1249 1251 1252 1254 1255 1258
 E          1260 1261 1262 1263 1264 1265 1267 1272 1273 1274 1275 1276 1279 1280
 E          1281 1287 1289 1294 1296 1297 1300 1302 1303 1310 1311 1312 1313 1316
 E          1317 1320 1321 1322 1324 1325 1332 1334 1335 1342 1346 1349 1351 1353
 E          1356 1360 1361 1362 1363 1364 1367 1369 1374 1375 1377 1379 1386 1394
 E          1395 1396 1397 1401 1402 1403 1404 1405 1406 1408 1412 1413 1415 1422
 E          1425 1429 1432 1433 1434 1435 1437 1438 1439 1440 1441 1445 1446 1447
 E          1449 1450 1452 1460 1462 1464 1466 1469 1474 1480 1481 1492 1498 1500
 E          1508 1510 1512 1513 1515 1516 1523 1524 1527 1530 1534 1539 1540 1541
 E          1546 1547 1554 1555 1561 1564 1567 1568 1571 1579 1585 1587 1592 1594
 E          1596 1600 1601 1602 1604 1605 1611 1612 1615 1616 1624 1626 1630 1631
 E          1632 1634 1635 1639 1643 1644 1646 1648 1656 1661 1664 1665 1666 1667
 E          1670 1672 1673 1680 1681 1682 1684 1686 1687 1689 1690 1695 1696 1697
 E          1704 1710 1711 1712 1714 1721 1731 1739 1741 1742 1747 1748 1755 1762
 E          1764 1770 1777 1778 1782 1786 1789 1790 1792 1795 1796 1797 1798 1801
 E          1804 1809 1810 1812 1816 1818 1820 1821 1822 1823 1826 1827 1833 1834
 E          1835 1837 1840 1845 1846 1848 1851 1852 1855 1860 1863 1864 1866 1875
 E          1876 1877 1879 1880 1881 1887 1889 1890 1892 1895 1899 1900 1901 1902
 E          1906 1913 1915 1921 1922 1924 1925 1927 1928 1932 1933 1934 1936 1938
 E          1939 1941 1949 1951 1952 1955 1956 1957 1959 1960 1965 1972 1979 1987
 E          1989 1991 1993 1994 1995] and [ 2  5  7  8 14 16 19]
 E       assert 4 == 5
 E        +  where 4 = round(4.0)
 E        +    where 4.0 = float(4)
 E        +  and   5 = round(5.0)
 E        +    where 5.0 = float(5.0)

 a          = array([   0,    3,    5,    6,    7,   12,   14,   16,   17,   20,   22,
         23,   25,   26,   27,   32,   33,   ... 1952, 1955,
       1956, 1957, 1959, 1960, 1965, 1972, 1979, 1987, 1989, 1991, 1993,
       1994, 1995], dtype=uint16)
 a_length   = 953
 b          = array([ 2,  5,  7,  8, 14, 16, 19], dtype=uint16)
 b_length   = 9
 capability = 'sve_f16'
 dtype      = 'uint16'
 expected   = 4
 first_length_bound = 1000
 result     = 5.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_f16-10-1000-uint16-2-5] __________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 10
 capability = 'sve_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [13] from [   0    3    4    5    8   13   14   15   23   28   34   36   40   51
 E            57   58   64   67   71   72   73   77   79   95   98   99  101  102
 E           104  108  110  114  116  119  121  125  128  133  136  149  150  152
 E           156  158  159  160  162  170  172  175  176  177  183  185  191  198
 E           199  203  210  211  216  217  221  223  224  226  228  232  233  237
 E           240  242  243  244  246  253  258  260  263  264  271  277  280  282
 E           283  285  287  290  296  299  306  311  313  320  325  326  330  332
 E           333  338  340  342  345  350  357  358  361  364  365  366  370  371
 E           372  374  375  376  380  381  383  390  392  393  395  398  399  402
 E           403  407  409  411  412  415  424  426  433  439  444  447  449  452
 E           462  465  466  467  469  472  476  480  482  483  490  491  496  497
 E           509  511  515  516  535  536  537  539  542  545  550  551  553  557
 E           561  562  563  565  567  568  569  572  574  575  576  582  587  591
 E           593  596  602  604  605  606  607  608  612  616  624  627  641  644
 E           647  648  652  658  661  665  667  668  669  670  673  675  679  680
 E           686  687  688  690  695  700  703  704  705  706  708  711  712  715
 E           720  722  725  735  737  738  745  747  748  756  757  762  766  767
 E           770  772  774  777  780  788  794  795  796  800  802  806  808  809
 E           812  814  817  825  828  830  832  834  846  849  850  856  858  861
 E           865  866  868  870  878  880  882  888  893  896  900  901  904  905
 E           906  908  909  910  914  915  916  919  922  923  931  942  943  949
 E           950  951  954  959  961  967  973  976  978  979  981  984  986  989
 E           991  994  995  997 1000 1002 1003 1004 1008 1009 1011 1013 1017 1019
 E          1023 1026 1028 1029 1030 1031 1037 1038 1039 1040 1041 1047 1048 1051
 E          1052 1056 1060 1061 1062 1063 1070 1074 1075 1081 1085 1086 1096 1099
 E          1100 1101 1107 1108 1116 1117 1118 1123 1125 1129 1132 1135 1139 1140
 E          1142 1148 1151 1153 1157 1158 1167 1171 1175 1179 1185 1187 1188 1204
 E          1207 1211 1219 1221 1222 1223 1225 1234 1236 1238 1240 1242 1243 1249
 E          1251 1254 1255 1258 1260 1262 1266 1272 1273 1281 1283 1284 1286 1291
 E          1292 1293 1298 1301 1306 1307 1312 1314 1315 1321 1324 1325 1326 1328
 E          1330 1331 1338 1346 1349 1356 1357 1361 1364 1370 1374 1375 1376 1382
 E          1384 1387 1389 1394 1395 1399 1401 1406 1411 1414 1415 1416 1420 1423
 E          1429 1434 1437 1439 1442 1445 1448 1458 1460 1464 1468 1470 1473 1475
 E          1476 1482 1484 1487 1488 1489 1492 1495 1504 1505 1510 1511 1512 1514
 E          1516 1520 1521 1524 1525 1528 1543 1544 1545 1550 1556 1558 1559 1560
 E          1563 1564 1569 1571 1572 1588 1589 1590 1592 1597 1601 1602 1604 1606
 E          1607 1608 1611 1612 1615 1616 1617 1620 1622 1623 1628 1631 1633 1639
 E          1641 1642 1643 1644 1646 1648 1652 1653 1657 1658 1659 1661 1664 1666
 E          1673 1674 1677 1680 1683 1692 1693 1695 1697 1708 1712 1713 1714 1717
 E          1720 1726 1727 1732 1739 1740 1741 1745 1746 1748 1753 1757 1759 1762
 E          1763 1765 1766 1771 1775 1779 1780 1783 1787 1790 1791 1796 1799 1803
 E          1808 1810 1811 1814 1815 1818 1819 1822 1825 1827 1828 1834 1841 1851
 E          1856 1857 1860 1863 1864 1866 1869 1871 1873 1874 1875 1876 1879 1880
 E          1881 1887 1890 1893 1897 1898 1900 1903 1904 1906 1916 1919 1920 1924
 E          1931 1936 1938 1939 1948 1949 1952 1953 1954 1955 1957 1960 1961 1962
 E          1966 1967 1973 1977 1980 1985 1988 1990 1995] and [ 6 10 11 13 17]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([   0,    3,    4,    5,    8,   13,   14,   15,   23,   28,   34,
         36,   40,   51,   57,   58,   64,   ... 1954, 1955, 1957,
       1960, 1961, 1962, 1966, 1967, 1973, 1977, 1980, 1985, 1988, 1990,
       1995], dtype=uint16)
 a_length   = 783
 b          = array([ 6, 10, 11, 13, 17], dtype=uint16)
 b_length   = 8
 capability = 'sve_f16'
 dtype      = 'uint16'
 expected   = 1
 first_length_bound = 1000
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_f16-10-1000-uint16-4-5] __________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 10
 capability = 'sve_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [   0    7   11   18   35   40   46   66   76  119  123  143  148  154
 E           163  181  194  199  200  203  214  216  217  224  225  232  243  257
 E           274  304  372  376  417  420  431  444  452  459  480  481  491  498
 E           503  504  505  511  512  539  550  579  613  615  623  631  644  649
 E           652  653  669  671  679  682  690  693  746  753  763  785  791  796
 E           809  830  851  858  863  867  875  887  888  892  897  909  916  929
 E           930  935  936  947  951  973  984 1005 1006 1014 1016 1043 1054 1062
 E          1069 1095 1097 1100 1102 1116 1121 1130 1135 1141 1157 1171 1220 1221
 E          1225 1231 1237 1250 1262 1278 1297 1302 1316 1325 1336 1353 1362 1369
 E          1397 1402 1415 1425 1455 1469 1473 1533 1547 1550 1587 1627 1645 1652
 E          1656 1657 1658 1665 1684 1686 1698 1702 1715 1716 1719 1729 1734 1741
 E          1749 1751 1771 1811 1825 1837 1857 1858 1864 1940 1947 1951 1966 1978
 E          1986 1992] and [19]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([   0,    7,   11,   18,   35,   40,   46,   66,   76,  119,  123,
        143,  148,  154,  163,  181,  194,  1...  1749, 1751, 1771, 1811, 1825, 1837, 1857, 1858, 1864, 1940, 1947,
       1951, 1966, 1978, 1986, 1992], dtype=uint16)
 a_length   = 177
 b          = array([19], dtype=uint16)
 b_length   = 1
 capability = 'sve_f16'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 1000
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_f16-10-1000-uint32-2-5] __________________

 dtype = 'uint32', first_length_bound = 1000, second_length_bound = 10
 capability = 'sve_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [15] from [   0    1    2    3    6    7    9   10   12   15   18   20   23   24
 E            26   28   32   38   40   41   45   46   47   48   49   52   53   55
 E            56   58   61   63   66   67   69   73   74   75   76   77   79   80
 E            84   86   91   92   93   99  101  106  109  110  113  116  119  120
 E           125  129  132  133  134  140  142  144  146  150  153  159  160  161
 E           166  167  169  171  176  181  182  185  186  187  188  189  190  194
 E           195  196  199  201  202  203  205  208  213  214  215  219  220  223
 E           225  227  232  234  236  237  251  253  257  258  259  265  268  269
 E           271  272  281  282  283  290  295  297  299  300  301  302  305  312
 E           313  315  322  323  327  328  329  332  333  336  337  343  344  346
 E           348  351  353  354  355  356  357  361  363  365  367  369  370  373
 E           374  377  379  381  384  385  389  390  394  398  399  401  405  408
 E           409  411  416  418  421  425  427  428  430  431  433  442  443  446
 E           451  452  460  461  462  464  465  466  467  469  473  476  478  479
 E           483  484  487  489  493  495  498  500  501  504  507  508  511  514
 E           518  520  523  526  527  531  532  533  534  535  536  537  539  542
 E           543  545  548  549  552  553  555  557  559  564  565  566  567  569
 E           571  572  573  575  576  577  579  587  598  601  607  611  619  622
 E           623  628  632  637  638  641  645  646  647  648  651  657  663  667
 E           669  672  673  674  678  680  681  683  686  689  690  693  698  699
 E           700  702  712  717  719  720  721  722  731  734  738  739  741  744
 E           745  751  754  759  761  765  769  773  777  778  782  790  793  796
 E           799  800  806  808  811  813  815  816  818  821  823  827  828  830
 E           833  836  839  841  843  846  849  852  859  863  868  872  873  876
 E           877  880  887  888  889  892  895  897  901  904  906  912  914  926
 E           929  933  938  953  954  956  958  959  961  965  966  967  969  970
 E           978  979  980  988  989  993 1001 1003 1006 1009 1027 1030 1033 1039
 E          1041 1043 1045 1048 1049 1050 1051 1052 1054 1056 1057 1058 1060 1061
 E          1062 1063 1065 1066 1067 1068 1073 1076 1079 1086 1087 1088 1096 1098
 E          1103 1107 1108 1110 1115 1116 1122 1123 1126 1133 1135 1139 1140 1141
 E          1142 1144 1147 1148 1153 1161 1165 1173 1177 1180 1188 1189 1192 1193
 E          1194 1196 1197 1202 1203 1206 1208 1210 1214 1215 1217 1218 1219 1223
 E          1226 1229 1233 1241 1243 1245 1251 1254 1255 1257 1259 1261 1262 1264
 E          1270 1271 1272 1276 1278 1283 1289 1291 1292 1294 1295 1296 1298 1300
 E          1304 1306 1307 1308 1311 1312 1315 1317 1318 1320 1322 1323 1325 1327
 E          1328 1334 1336 1339 1341 1342 1343 1347 1348 1350 1354 1356 1362 1364
 E          1366 1367 1368 1369 1370 1376 1380 1381 1384 1386 1387 1389 1390 1391
 E          1395 1401 1402 1408 1412 1413 1414 1422 1423 1425 1427 1436 1437 1439
 E          1441 1444 1445 1450 1454 1459 1460 1463 1464 1465 1468 1469 1472 1475
 E          1484 1488 1490 1491 1494 1495 1496 1499 1502 1503 1505 1508 1511 1513
 E          1519 1521 1524 1529 1532 1538 1540 1541 1555 1561 1562 1563 1564 1565
 E          1568 1573 1574 1576 1578 1580 1583 1586 1591 1598 1605 1611 1613 1614
 E          1624 1628 1629 1631 1633 1634 1635 1636 1639 1643 1645 1653 1657 1658
 E          1659 1663 1664 1670 1671 1672 1676 1677 1680 1682 1684 1685 1689 1695
 E          1696 1698 1702 1706 1709 1710 1712 1716 1717 1719 1720 1721 1723 1731
 E          1738 1739 1740 1742 1748 1749 1753 1756 1757 1761 1763 1764 1774 1776
 E          1777 1779 1782 1784 1787 1790 1791 1792 1793 1794 1800 1802 1804 1810
 E          1811 1813 1814 1818 1822 1829 1830 1831 1832 1833 1838 1842 1845 1846
 E          1847 1849 1851 1854 1856 1857 1859 1861 1862 1864 1868 1869 1870 1872
 E          1873 1877 1881 1885 1886 1890 1893 1899 1900 1901 1904 1905 1906 1911
 E          1915 1917 1920 1921 1923 1924 1925 1927 1929 1932 1935 1938 1939 1942
 E          1944 1946 1948 1950 1954 1957 1959 1961 1964 1966 1968 1971 1982 1984
 E          1987 1995] and [13 15 17]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([   0,    1,    2,    3,    6,    7,    9,   10,   12,   15,   18,
         20,   23,   24,   26,   28,   32,   ...
       1946, 1948, 1950, 1954, 1957, 1959, 1961, 1964, 1966, 1968, 1971,
       1982, 1984, 1987, 1995], dtype=uint32)
 a_length   = 911
 b          = array([13, 15, 17], dtype=uint32)
 b_length   = 4
 capability = 'sve_f16'
 dtype      = 'uint32'
 expected   = 1
 first_length_bound = 1000
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_f16-100-100-uint16-1-5] __________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 100
 capability = 'sve_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 9 26 54] from [  0   9  13  16  20  26  27  36  38  51  54  65  67  76  78  84 101 104
 E          113 115 117 134 145 147 153 185 186 192 197] and [  9  10  24  26  33  54  55  61  81  91  94 110 126 133 146 163 177 189
 E          191]
 E       assert 3 == 4
 E        +  where 3 = round(3.0)
 E        +    where 3.0 = float(3)
 E        +  and   4 = round(4.0)
 E        +    where 4.0 = float(4.0)

 a          = array([  0,   9,  13,  16,  20,  26,  27,  36,  38,  51,  54,  65,  67,
        76,  78,  84, 101, 104, 113, 115, 117, 134, 145, 147, 153, 185,
       186, 192, 197], dtype=uint16)
 a_length   = 31
 b          = array([  9,  10,  24,  26,  33,  54,  55,  61,  81,  91,  94, 110, 126,
       133, 146, 163, 177, 189, 191], dtype=uint16)
 b_length   = 19
 capability = 'sve_f16'
 dtype      = 'uint16'
 expected   = 3
 first_length_bound = 100
 result     = 4.0
 second_length_bound = 100

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[sve_f16-100-1000-uint32-2-5] __________________

 dtype = 'uint32', first_length_bound = 1000, second_length_bound = 100
 capability = 'sve_f16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 37 149 181] from [   0    2    5   10   14   18   28   31   37   45   48   56   58   62
 E            67   70   73   81   82   91   92   94  103  106  110  116  117  120
 E           122  124  125  127  132  136  139  141  143  145  147  149  152  155
 E           156  157  158  159  177  181  184  186  188  192  193  195  196  197
 E           205  207  208  209  211  212  214  224  226  228  232  239  240  243
 E           248  253  258  260  262  266  267  270  272  289  296  298  299  300
 E           302  308  310  313  315  316  318  321  322  325  329  330  338  339
 E           340  345  347  349  351  353  355  361  378  385  388  389  393  394
 E           396  410  411  412  413  414  417  426  433  438  442  454  458  467
 E           468  471  472  475  478  480  481  485  486  497  509  512  513  517
 E           520  525  530  533  535  536  544  545  549  551  558  560  564  566
 E           567  571  575  576  577  588  594  599  601  604  607  612  613  614
 E           620  621  625  627  629  630  632  638  643  645  648  650  651  653
 E           655  659  661  664  667  671  676  677  678  687  688  692  701  702
 E           703  705  706  707  715  719  725  727  733  734  736  738  741  742
 E           743  745  746  749  753  754  756  760  762  764  769  772  776  779
 E           783  787  789  791  797  805  810  812  815  817  819  821  824  829
 E           830  831  832  836  841  842  844  847  853  854  855  857  860  862
 E           865  868  873  874  883  884  887  888  890  891  894  897  905  910
 E           914  916  917  920  925  927  929  930  938  946  952  960  962  966
 E           972  976  982  983  988  990  993  995  999 1004 1007 1008 1014 1017
 E          1019 1026 1027 1028 1030 1032 1033 1034 1036 1037 1040 1043 1051 1055
 E          1059 1060 1068 1070 1071 1077 1079 1082 1083 1088 1089 1090 1091 1095
 E          1099 1107 1108 1113 1116 1118 1122 1123 1126 1127 1128 1131 1134 1137
 E          1138 1139 1142 1148 1153 1154 1158 1159 1162 1164 1166 1168 1179 1181
 E          1183 1188 1192 1196 1197 1198 1201 1205 1206 1213 1216 1223 1225 1228
 E          1233 1239 1240 1242 1245 1246 1249 1251 1252 1253 1259 1260 1266 1270
 E          1271 1272 1274 1283 1284 1291 1294 1297 1300 1301 1303 1304 1306 1308
 E          1310 1311 1315 1318 1324 1327 1332 1338 1339 1344 1346 1347 1348 1351
 E          1354 1357 1358 1361 1368 1382 1383 1384 1388 1390 1391 1393 1396 1397
 E          1404 1405 1408 1411 1422 1426 1429 1430 1434 1435 1436 1440 1445 1451
 E          1456 1460 1464 1468 1473 1481 1482 1484 1486 1491 1494 1497 1499 1501
 E          1503 1504 1507 1508 1509 1510 1511 1514 1519 1521 1522 1525 1527 1530
 E          1531 1536 1543 1544 1546 1549 1550 1552 1553 1556 1560 1565 1567 1569
 E          1570 1572 1573 1579 1586 1588 1589 1591 1595 1603 1608 1609 1610 1615
 E          1616 1618 1619 1622 1624 1625 1626 1633 1635 1636 1637 1644 1646 1649
 E          1656 1662 1664 1668 1674 1675 1680 1681 1683 1685 1686 1687 1691 1692
 E          1697 1698 1701 1705 1707 1708 1709 1711 1712 1713 1722 1724 1725 1726
 E          1731 1732 1745 1749 1752 1753 1755 1757 1758 1760 1761 1762 1764 1765
 E          1767 1769 1779 1783 1784 1785 1790 1793 1799 1801 1804 1805 1810 1811
 E          1814 1816 1819 1820 1824 1826 1831 1838 1840 1841 1842 1844 1847 1849
 E          1850 1853 1854 1857 1863 1864 1865 1873 1882 1883 1886 1887 1890 1893
 E          1895 1897 1903 1904 1909 1911 1921 1925 1929 1930 1931 1933 1934 1942
 E          1957 1960 1962 1964 1969 1971 1973 1974 1975 1978 1985 1987 1990 1993
 E          1994 1999] and [ 13  22  24  37  38  46  60 107 133 148 149 150 175 181]
 E       assert 3 == 4
 E        +  where 3 = round(3.0)
 E        +    where 3.0 = float(3)
 E        +  and   4 = round(4.0)
 E        +    where 4.0 = float(4.0)

 a          = array([   0,    2,    5,   10,   14,   18,   28,   31,   37,   45,   48,
         56,   58,   62,   67,   70,   73,   ... 1960, 1962,
       1964, 1969, 1971, 1973, 1974, 1975, 1978, 1985, 1987, 1990, 1993,
       1994, 1999], dtype=uint32)
 a_length   = 731
 b          = array([ 13,  22,  24,  37,  38,  46,  60, 107, 133, 148, 149, 150, 175,
       181], dtype=uint32)
 b_length   = 14
 capability = 'sve_f16'
 dtype      = 'uint32'
 expected   = 3
 first_length_bound = 1000
 result     = 4.0
 second_length_bound = 100

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_bf16-10-10-uint32-2-5] ___________________

 dtype = 'uint32', first_length_bound = 10, second_length_bound = 10
 capability = 'sve_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [7 9] from [ 0  6  7  8  9 17] and [ 7  9 12 15 16 18]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([ 0,  6,  7,  8,  9, 17], dtype=uint32)
 a_length   = 7
 b          = array([ 7,  9, 12, 15, 16, 18], dtype=uint32)
 b_length   = 8
 capability = 'sve_bf16'
 dtype      = 'uint32'
 expected   = 2
 first_length_bound = 10
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_bf16-10-10-uint32-4-5] ___________________

 dtype = 'uint32', first_length_bound = 10, second_length_bound = 10
 capability = 'sve_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [ 0  3  6 11 14 17 18] and [2]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([ 0,  3,  6, 11, 14, 17, 18], dtype=uint32)
 a_length   = 8
 b          = array([2], dtype=uint32)
 b_length   = 1
 capability = 'sve_bf16'
 dtype      = 'uint32'
 expected   = 0
 first_length_bound = 10
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_bf16-10-100-uint16-1-5] __________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 10
 capability = 'sve_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [12] from [  0   3   6  12  13  14  30  33  36  38  39  44  45  48  50  52  55  69
 E           72  75  80  87  88  89  90  95 102 112 118 129 130 136 138 141 149 152
 E          156 165 168 173 185 186 190 196] and [ 1  4 10 11 12 16]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([  0,   3,   6,  12,  13,  14,  30,  33,  36,  38,  39,  44,  45,
        48,  50,  52,  55,  69,  72,  75,  80,...
       102, 112, 118, 129, 130, 136, 138, 141, 149, 152, 156, 165, 168,
       173, 185, 186, 190, 196], dtype=uint16)
 a_length   = 48
 b          = array([ 1,  4, 10, 11, 12, 16], dtype=uint16)
 b_length   = 6
 capability = 'sve_bf16'
 dtype      = 'uint16'
 expected   = 1
 first_length_bound = 100
 result     = 2.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_bf16-10-100-uint16-4-5] __________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 10
 capability = 'sve_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 1 10 19] from [  0   1  10  13  15  19  20  24  25  26  27  30  32  38  40  41  44  45
 E           47  50  52  55  56  62  63  65  67  68  72  77  78  81  84  88  91 100
 E          101 105 110 114 115 116 117 118 119 123 134 135 137 141 142 143 145 147
 E          151 153 155 161 167 168 170 172 176 177 178 179 180 187 189 191 194 196
 E          197 199] and [ 1  3  5  9 10 14 16 19]
 E       assert 3 == 4
 E        +  where 3 = round(3.0)
 E        +    where 3.0 = float(3)
 E        +  and   4 = round(4.0)
 E        +    where 4.0 = float(4.0)

 a          = array([  0,   1,  10,  13,  15,  19,  20,  24,  25,  26,  27,  30,  32,
        38,  40,  41,  44,  45,  47,  50,  52,...1, 153, 155, 161, 167, 168, 170, 172, 176, 177, 178,
       179, 180, 187, 189, 191, 194, 196, 197, 199], dtype=uint16)
 a_length   = 88
 b          = array([ 1,  3,  5,  9, 10, 14, 16, 19], dtype=uint16)
 b_length   = 8
 capability = 'sve_bf16'
 dtype      = 'uint16'
 expected   = 3
 first_length_bound = 100
 result     = 4.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[sve_bf16-10-1000-uint32-3-5] __________________

 dtype = 'uint32', first_length_bound = 1000, second_length_bound = 10
 capability = 'sve_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [13 18] from [   0    1    4    9   11   13   14   18   27   28   29   30   31   33
 E            37   39   40   41   44   51   56   58   60   61   62   69   76   77
 E            79   81   83   85   87   89   91   99  100  102  104  105  106  109
 E           111  112  113  115  116  119  124  126  131  134  136  139  141  142
 E           143  144  145  152  158  160  161  166  172  175  180  183  184  185
 E           187  189  194  197  200  201  202  203  208  210  213  215  217  218
 E           221  223  232  234  236  237  241  244  247  250  252  255  256  258
 E           260  263  269  273  276  283  284  290  291  293  297  298  303  307
 E           309  312  314  316  317  323  327  329  332  340  345  348  349  350
 E           351  356  357  358  359  363  364  368  369  370  375  377  378  381
 E           383  385  386  391  392  393  394  397  399  401  404  408  410  411
 E           413  416  418  425  427  428  430  434  438  439  440  444  446  449
 E           450  451  452  454  455  456  460  462  463  471  474  475  478  479
 E           480  483  484  487  489  492  498  502  504  505  509  511  512  513
 E           514  516  519  521  522  525  526  528  529  532  534  540  554  556
 E           560  561  562  564  571  573  574  581  582  587  588  591  597  598
 E           601  607  608  615  616  623  625  626  627  628  629  631  637  640
 E           645  654  658  659  660  662  665  668  671  674  675  676  677  679
 E           681  684  686  688  689  691  693  694  695  696  702  704  707  709
 E           712  714  715  721  723  724  727  728  729  730  731  733  739  741
 E           743  749  753  756  758  761  763  765  767  768  771  775  776  777
 E           778  779  782  786  788  789  790  791  793  795  799  801  802  803
 E           804  805  808  809  811  813  817  820  834  838  843  845  851  854
 E           855  859  861  863  865  867  868  869  870  872  874  877  878  879
 E           880  881  884  885  888  892  893  894  898  902  903  904  905  906
 E           909  912  914  915  917  919  920  921  924  931  936  938  952  953
 E           954  956  957  960  962  963  964  969  972  973  974  979  980  984
 E           988  989  990  991  994  995  997  999 1002 1003 1004 1008 1012 1014
 E          1018 1019 1020 1021 1022 1023 1024 1028 1032 1036 1040 1041 1042 1044
 E          1046 1048 1052 1055 1056 1057 1062 1065 1067 1070 1071 1074 1080 1098
 E          1101 1107 1108 1109 1112 1113 1117 1121 1125 1126 1127 1129 1132 1133
 E          1134 1135 1138 1139 1140 1141 1143 1145 1147 1148 1149 1150 1154 1156
 E          1157 1158 1160 1166 1169 1170 1171 1172 1176 1177 1181 1182 1185 1187
 E          1192 1200 1206 1208 1211 1212 1214 1221 1225 1227 1234 1235 1240 1244
 E          1246 1247 1248 1249 1252 1254 1260 1262 1269 1271 1275 1277 1279 1280
 E          1283 1287 1289 1296 1298 1299 1304 1308 1310 1313 1314 1315 1321 1325
 E          1332 1333 1336 1337 1338 1340 1347 1350 1354 1355 1356 1358 1359 1361
 E          1362 1363 1364 1369 1372 1373 1375 1376 1378 1379 1381 1384 1386 1390
 E          1395 1397 1401 1402 1403 1415 1416 1423 1424 1427 1433 1435 1439 1448
 E          1457 1458 1459 1462 1466 1470 1477 1480 1482 1484 1486 1492 1493 1496
 E          1504 1506 1507 1508 1510 1512 1513 1514 1516 1517 1522 1524 1525 1526
 E          1531 1534 1536 1537 1540 1543 1544 1546 1549 1550 1553 1556 1558 1562
 E          1563 1567 1571 1572 1573 1574 1579 1580 1583 1588 1589 1591 1593 1594
 E          1596 1599 1605 1609 1614 1615 1623 1627 1630 1632 1637 1638 1641 1645
 E          1648 1651 1652 1654 1655 1657 1659 1662 1664 1667 1670 1673 1677 1680
 E          1681 1682 1690 1697 1700 1701 1708 1709 1710 1711 1712 1715 1716 1717
 E          1723 1728 1732 1733 1739 1742 1744 1746 1747 1748 1750 1751 1753 1755
 E          1756 1757 1759 1764 1765 1768 1769 1770 1771 1774 1775 1776 1777 1781
 E          1782 1784 1788 1792 1793 1796 1797 1798 1800 1804 1805 1808 1810 1813
 E          1814 1816 1818 1822 1824 1826 1830 1831 1832 1834 1835 1836 1837 1844
 E          1845 1846 1850 1852 1855 1860 1862 1863 1865 1867 1870 1871 1874 1876
 E          1879 1883 1884 1885 1887 1888 1889 1893 1894 1895 1900 1902 1903 1904
 E          1906 1912 1914 1916 1917 1927 1931 1932 1934 1937 1940 1941 1948 1949
 E          1951 1952 1954 1956 1957 1958 1959 1960 1962 1965 1971 1974 1980 1981
 E          1982 1984 1987 1994 1997] and [ 3  5  8 13 17 18]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([   0,    1,    4,    9,   11,   13,   14,   18,   27,   28,   29,
         30,   31,   33,   37,   39,   40,   ... 1957, 1958,
       1959, 1960, 1962, 1965, 1971, 1974, 1980, 1981, 1982, 1984, 1987,
       1994, 1997], dtype=uint32)
 a_length   = 975
 b          = array([ 3,  5,  8, 13, 17, 18], dtype=uint32)
 b_length   = 6
 capability = 'sve_bf16'
 dtype      = 'uint32'
 expected   = 2
 first_length_bound = 1000
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 _________________ test_intersect[sve_bf16-10-1000-uint32-5-5] __________________

 dtype = 'uint32', first_length_bound = 1000, second_length_bound = 10
 capability = 'sve_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [   0    2    3    4    6    9   10   11   12   16   17   19   21   22
 E            23   32   33   38   39   41   43   47   50   52   58   66   70   73
 E            75   78   80   84   86   96   98   99  109  113  117  119  121  132
 E           133  134  136  139  141  142  143  148  149  153  159  161  163  167
 E           168  170  180  186  199  200  201  202  203  206  211  219  223  225
 E           228  231  232  237  238  243  249  250  251  252  255  256  258  260
 E           264  272  274  276  280  286  288  289  293  296  298  301  302  305
 E           306  308  310  313  314  316  317  320  321  327  329  331  335  336
 E           341  347  348  349  350  351  354  358  369  373  375  378  384  388
 E           390  391  392  394  395  396  398  405  408  410  413  414  419  421
 E           422  425  428  430  434  435  436  438  440  442  443  445  450  451
 E           456  458  463  466  468  471  472  475  478  479  480  481  483  485
 E           487  490  491  492  497  502  504  508  511  512  515  516  524  527
 E           531  533  534  535  540  546  552  554  556  559  560  564  566  570
 E           573  574  582  585  586  587  589  590  593  595  596  599  602  603
 E           604  606  609  611  612  619  622  632  633  636  637  638  639  640
 E           641  642  651  652  655  658  659  660  666  667  668  669  670  671
 E           675  679  682  687  691  697  700  703  705  707  709  722  723  725
 E           727  728  729  730  734  738  739  745  747  751  752  756  759  777
 E           778  781  782  783  785  786  787  791  798  800  802  808  816  818
 E           821  822  824  826  827  830  831  833  834  835  836  844  845  853
 E           860  861  865  870  872  875  877  883  887  889  890  893  895  898
 E           899  904  906  909  912  913  915  916  919  920  924  926  930  932
 E           933  937  938  939  943  944  947  948  949  955  959  962  963  966
 E           970  973  974  977  978  980  985  991  992  997 1004 1005 1008 1011
 E          1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1025 1026 1027 1028
 E          1029 1030 1031 1037 1040 1042 1044 1050 1052 1053 1059 1066 1067 1068
 E          1070 1080 1088 1092 1094 1095 1098 1102 1104 1105 1107 1112 1115 1116
 E          1120 1130 1132 1133 1134 1135 1136 1139 1140 1143 1145 1147 1148 1149
 E          1150 1155 1157 1159 1162 1163 1165 1167 1168 1170 1171 1172 1176 1178
 E          1179 1180 1183 1189 1193 1194 1198 1199 1201 1204 1209 1212 1216 1217
 E          1224 1225 1226 1232 1233 1237 1240 1241 1242 1247 1250 1253 1255 1257
 E          1258 1260 1261 1264 1267 1268 1279 1280 1281 1282 1284 1289 1292 1293
 E          1305 1306 1317 1322 1329 1330 1333 1334 1335 1342 1344 1345 1346 1350
 E          1360 1361 1365 1366 1368 1370 1371 1373 1377 1381 1387 1388 1390 1391
 E          1392 1393 1396 1399 1400 1401 1403 1404 1405 1406 1413 1414 1418 1419
 E          1423 1425 1429 1434 1436 1437 1442 1444 1446 1447 1448 1450 1451 1453
 E          1455 1460 1462 1474 1476 1483 1485 1486 1487 1488 1490 1491 1493 1494
 E          1496 1497 1500 1505 1507 1508 1509 1513 1515 1517 1518 1520 1523 1524
 E          1525 1527 1528 1529 1532 1534 1537 1539 1540 1541 1542 1544 1549 1550
 E          1553 1554 1556 1557 1560 1564 1566 1567 1569 1571 1573 1575 1582 1585
 E          1586 1588 1589 1591 1595 1596 1598 1599 1600 1602 1605 1608 1610 1613
 E          1615 1616 1620 1623 1624 1625 1626 1628 1630 1631 1633 1634 1640 1641
 E          1642 1643 1644 1648 1649 1651 1653 1658 1666 1668 1669 1671 1674 1677
 E          1678 1680 1685 1687 1689 1690 1695 1696 1697 1699 1700 1706 1707 1709
 E          1710 1711 1712 1713 1714 1717 1718 1723 1724 1725 1729 1732 1733 1738
 E          1744 1747 1749 1750 1753 1755 1759 1761 1763 1765 1767 1768 1769 1770
 E          1771 1775 1779 1781 1782 1787 1788 1790 1791 1795 1796 1797 1801 1803
 E          1804 1809 1810 1812 1815 1816 1818 1819 1821 1826 1830 1831 1836 1837
 E          1838 1839 1843 1844 1846 1853 1857 1858 1861 1864 1867 1873 1876 1877
 E          1879 1880 1882 1895 1898 1900 1901 1903 1904 1906 1909 1911 1912 1913
 E          1917 1918 1921 1922 1923 1925 1926 1930 1932 1934 1935 1938 1939 1940
 E          1942 1945 1950 1951 1956 1957 1958 1960 1963 1968 1976 1986 1989 1991
 E          1993 1995 1996 1998] and [15]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([   0,    2,    3,    4,    6,    9,   10,   11,   12,   16,   17,
         19,   21,   22,   23,   32,   33,   ..., 1950, 1951, 1956, 1957, 1958, 1960, 1963,
       1968, 1976, 1986, 1989, 1991, 1993, 1995, 1996, 1998], dtype=uint32)
 a_length   = 937
 b          = array([15], dtype=uint32)
 b_length   = 1
 capability = 'sve_bf16'
 dtype      = 'uint32'
 expected   = 0
 first_length_bound = 1000
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_bf16-100-10-uint16-5-5] __________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 100
 capability = 'sve_bf16'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [15] from [ 0  5  8  9 15 16 19] and [  2  15  27  41  44  51  57  81  90  97 103 112 113 120 125 136 145 153
 E          169 177 180 181 186 197]
 E       assert 1 == 2
 E        +  where 1 = round(1.0)
 E        +    where 1.0 = float(1)
 E        +  and   2 = round(2.0)
 E        +    where 2.0 = float(2.0)

 a          = array([ 0,  5,  8,  9, 15, 16, 19], dtype=uint16)
 a_length   = 7
 b          = array([  2,  15,  27,  41,  44,  51,  57,  81,  90,  97, 103, 112, 113,
       120, 125, 136, 145, 153, 169, 177, 180, 181, 186, 197],
      dtype=uint16)
 b_length   = 26
 capability = 'sve_bf16'
 dtype      = 'uint16'
 expected   = 1
 first_length_bound = 10
 result     = 2.0
 second_length_bound = 100

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[sve_i8-10-10-uint16-5-5] ____________________

 dtype = 'uint16', first_length_bound = 10, second_length_bound = 10
 capability = 'sve_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 7 12] from [ 0  2  6  7 10 12 15] and [ 5  7  8 12 17]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([ 0,  2,  6,  7, 10, 12, 15], dtype=uint16)
 a_length   = 8
 b          = array([ 5,  7,  8, 12, 17], dtype=uint16)
 b_length   = 5
 capability = 'sve_i8'
 dtype      = 'uint16'
 expected   = 2
 first_length_bound = 10
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[sve_i8-10-10-uint32-4-5] ____________________

 dtype = 'uint32', first_length_bound = 10, second_length_bound = 10
 capability = 'sve_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [ 0  1  6  7  9 14] and [13 16 17]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([ 0,  1,  6,  7,  9, 14], dtype=uint32)
 a_length   = 6
 b          = array([13, 16, 17], dtype=uint32)
 b_length   = 3
 capability = 'sve_i8'
 dtype      = 'uint32'
 expected   = 0
 first_length_bound = 10
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[sve_i8-10-100-uint16-3-5] ___________________

 dtype = 'uint16', first_length_bound = 100, second_length_bound = 10
 capability = 'sve_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [  0   2  10  19  20  38  45  48  54  58  59  62  79  90  92  97 119 127
 E          135 145 147 148 150 154 162 166 178 192 196] and [ 1  6  8 14 15 16]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([  0,   2,  10,  19,  20,  38,  45,  48,  54,  58,  59,  62,  79,
        90,  92,  97, 119, 127, 135, 145, 147, 148, 150, 154, 162, 166,
       178, 192, 196], dtype=uint16)
 a_length   = 32
 b          = array([ 1,  6,  8, 14, 15, 16], dtype=uint16)
 b_length   = 8
 capability = 'sve_i8'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 100
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[sve_i8-10-100-uint32-1-5] ___________________

 dtype = 'uint32', first_length_bound = 100, second_length_bound = 10
 capability = 'sve_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [ 9 11] from [  0   9  11  13  19  29  31  33  50  53  61  66  77  98 109 122 140 153
 E          156 160 164 181 199] and [ 1  3  5  9 10 11]
 E       assert 2 == 3
 E        +  where 2 = round(2.0)
 E        +    where 2.0 = float(2)
 E        +  and   3 = round(3.0)
 E        +    where 3.0 = float(3.0)

 a          = array([  0,   9,  11,  13,  19,  29,  31,  33,  50,  53,  61,  66,  77,
        98, 109, 122, 140, 153, 156, 160, 164, 181, 199], dtype=uint32)
 a_length   = 25
 b          = array([ 1,  3,  5,  9, 10, 11], dtype=uint32)
 b_length   = 9
 capability = 'sve_i8'
 dtype      = 'uint32'
 expected   = 2
 first_length_bound = 100
 result     = 3.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 ___________________ test_intersect[sve_i8-10-100-uint32-3-5] ___________________

 dtype = 'uint32', first_length_bound = 100, second_length_bound = 10
 capability = 'sve_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [  0   1   6   8  10  11  19  30  33  37  40  46  50  57  61  62  63  64
 E           71  75  77  91 104 110 113 133 139 140 142 153 154 155 162 165 166 167
 E          172 174 177 179 181 187] and [5]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([  0,   1,   6,   8,  10,  11,  19,  30,  33,  37,  40,  46,  50,
        57,  61,  62,  63,  64,  71,  75,  77,... 113, 133,
       139, 140, 142, 153, 154, 155, 162, 165, 166, 167, 172, 174, 177,
       179, 181, 187], dtype=uint32)
 a_length   = 45
 b          = array([5], dtype=uint32)
 b_length   = 1
 capability = 'sve_i8'
 dtype      = 'uint32'
 expected   = 0
 first_length_bound = 100
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_i8-10-1000-uint16-1-5] ___________________

 dtype = 'uint16', first_length_bound = 1000, second_length_bound = 10
 capability = 'sve_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [   0   16   30   35   39   56   72   83   94  102  119  122  143  144
 E           170  172  174  176  183  187  189  193  202  227  229  230  256  265
 E           275  293  298  300  310  330  331  340  342  348  353  354  355  372
 E           393  397  416  419  425  426  432  447  450  451  454  455  467  483
 E           493  494  498  499  517  523  526  538  543  556  563  571  575  588
 E           600  608  612  613  620  622  623  627  631  634  641  654  664  681
 E           683  688  699  706  716  738  743  744  748  756  768  769  771  778
 E           791  801  817  824  825  826  853  860  869  872  874  877  881  897
 E           899  916  922  928  939  947  959  967  990  991 1018 1024 1043 1055
 E          1060 1061 1065 1066 1068 1086 1093 1113 1114 1116 1126 1139 1142 1143
 E          1145 1149 1153 1157 1163 1173 1176 1196 1208 1209 1213 1238 1240 1243
 E          1251 1255 1261 1265 1274 1283 1285 1290 1325 1326 1328 1332 1334 1353
 E          1354 1357 1364 1376 1386 1387 1414 1425 1427 1430 1431 1438 1441 1445
 E          1457 1480 1484 1493 1502 1510 1512 1515 1516 1518 1520 1525 1530 1536
 E          1538 1551 1557 1563 1566 1574 1582 1585 1592 1608 1609 1618 1631 1635
 E          1658 1683 1684 1689 1694 1700 1701 1710 1717 1718 1723 1728 1729 1730
 E          1731 1735 1740 1746 1753 1756 1758 1766 1775 1776 1789 1798 1809 1810
 E          1819 1825 1841 1850 1860 1861 1872 1889 1890 1930 1938 1956 1961 1963
 E          1967 1973 1981] and [ 4 17 18]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([   0,   16,   30,   35,   39,   56,   72,   83,   94,  102,  119,
        122,  143,  144,  170,  172,  174,  1... 1841, 1850,
       1860, 1861, 1872, 1889, 1890, 1930, 1938, 1956, 1961, 1963, 1967,
       1973, 1981], dtype=uint16)
 a_length   = 279
 b          = array([ 4, 17, 18], dtype=uint16)
 b_length   = 3
 capability = 'sve_i8'
 dtype      = 'uint16'
 expected   = 0
 first_length_bound = 1000
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 __________________ test_intersect[sve_i8-10-1000-uint32-5-5] ___________________

 dtype = 'uint32', first_length_bound = 1000, second_length_bound = 10
 capability = 'sve_i8'

    @pytest.mark.skipif(not numpy_available, reason="NumPy is not installed")
    @pytest.mark.repeat(5)
    @pytest.mark.parametrize("dtype", ["uint16", "uint32"])
    @pytest.mark.parametrize("first_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("second_length_bound", [10, 100, 1000])
    @pytest.mark.parametrize("capability", possible_capabilities)
    def test_intersect(dtype, first_length_bound, second_length_bound, capability):
        """Compares the simd.intersect() function with numpy.intersect1d."""
    
        if is_running_under_qemu() and (platform.machine() == "aarch64" or platform.machine() == "arm64"):
            pytest.skip("In QEMU `aarch64` emulation on `x86_64` the `intersect` function is not reliable")
    
        np.random.seed()
    
        a_length = np.random.randint(1, first_length_bound)
        b_length = np.random.randint(1, second_length_bound)
        a = np.random.randint(first_length_bound * 2, size=a_length, dtype=dtype)
        b = np.random.randint(second_length_bound * 2, size=b_length, dtype=dtype)
    
        # Remove duplicates, converting into sorted arrays
        a = np.unique(a)
        b = np.unique(b)
    
        keep_one_capability(capability)
        expected = baseline_intersect(a, b)
        result = simd.intersect(a, b)
    
 >       assert round(float(expected)) == round(float(result)), f"Missing {np.intersect1d(a, b)} from {a} and {b}"
 E       AssertionError: Missing [] from [   0    7    8   13   16   17   18   19   20   34   35   37   39   43
 E            50   51   54   57   58   63   66   67   68   69   75   76   78   86
 E            87   89   90   93   94   96   97  101  102  103  104  105  110  114
 E           115  117  118  119  122  124  130  137  141  144  146  150  151  153
 E           154  157  161  165  167  172  173  176  179  185  188  192  196  198
 E           199  200  201  202  203  204  206  215  216  218  224  226  229  233
 E           234  237  240  244  245  246  253  256  257  263  270  275  276  277
 E           279  281  284  285  286  288  292  296  297  298  305  306  308  311
 E           312  314  317  319  320  322  331  336  337  340  342  345  346  347
 E           350  353  359  360  364  368  372  373  381  385  387  390  391  393
 E           398  399  403  405  410  415  417  420  422  425  427  428  430  435
 E           442  444  446  448  449  454  455  456  459  463  464  465  471  478
 E           479  484  485  491  493  496  498  502  511  515  516  517  519  520
 E           523  526  532  537  538  539  541  549  552  554  556  557  558  565
 E           569  575  576  577  583  588  589  597  602  604  615  617  620  621
 E           623  624  625  629  630  631  632  634  638  640  644  648  649  650
 E           651  658  662  666  667  669  672  677  678  679  681  684  686  688
 E           692  698  702  703  705  707  714  717  725  726  728  732  735  741
 E           744  745  747  755  759  760  761  764  765  767  768  772  773  776
 E           778  781  785  794  795  796  797  801  802  806  810  813  814  818
 E           819  821  824  825  827  829  833  835  838  840  842  847  852  853
 E           854  859  860  862  863  864  865  866  868  872  878  879  883  884
 E           887  888  890  895  897  900  903  907  909  917  919  920  925  932
 E           938  942  945  949  951  954  955  956  957  959  961  962  966  967
 E           968  972  974  978  981  984  992  994 1006 1012 1014 1015 1016 1019
 E          1023 1024 1027 1030 1039 1043 1045 1049 1050 1052 1054 1063 1065 1070
 E          1071 1074 1075 1079 1080 1083 1084 1085 1086 1087 1089 1095 1096 1100
 E          1105 1106 1108 1111 1113 1117 1119 1122 1125 1127 1128 1129 1130 1132
 E          1133 1142 1143 1145 1151 1161 1163 1169 1170 1174 1176 1178 1183 1186
 E          1188 1192 1193 1196 1199 1201 1205 1206 1209 1212 1214 1217 1220 1225
 E          1227 1230 1232 1235 1239 1243 1247 1248 1249 1251 1252 1254 1256 1257
 E          1259 1260 1261 1268 1270 1271 1272 1273 1274 1279 1280 1284 1288 1290
 E          1293 1299 1301 1304 1307 1309 1315 1318 1327 1328 1331 1336 1339 1343
 E          1346 1350 1351 1352 1353 1354 1359 1362 1369 1370 1372 1374 1384 1394
 E          1398 1402 1403 1404 1407 1410 1411 1412 1415 1419 1421 1422 1423 1424
 E          1429 1431 1433 1438 1442 1445 1446 1448 1460 1467 1469 1472 1473 1476
 E          1478 1479 1481 1482 1483 1487 1497 1502 1503 1504 1505 1508 1513 1515
 E          1521 1526 1536 1537 1542 1543 1545 1548 1552 1553 1556 1560 1567 1573
 E          1575 1577 1580 1581 1585 1586 1588 1589 1593 1594 1597 1598 1600 1601
 E          1604 1605 1607 1610 1613 1617 1623 1625 1629 1632 1635 1636 1637 1640
 E          1648 1649 1652 1653 1655 1656 1657 1661 1664 1670 1674 1675 1678 1682
 E          1684 1685 1686 1687 1690 1694 1695 1696 1697 1698 1699 1700 1701 1704
 E          1705 1708 1710 1713 1719 1720 1721 1722 1723 1725 1726 1727 1730 1734
 E          1735 1736 1739 1740 1744 1749 1751 1752 1753 1755 1756 1758 1759 1760
 E          1764 1766 1769 1771 1780 1783 1789 1790 1794 1795 1797 1800 1804 1814
 E          1816 1820 1824 1825 1828 1829 1835 1837 1841 1842 1843 1845 1846 1848
 E          1850 1851 1853 1855 1858 1859 1861 1865 1868 1870 1871 1876 1883 1889
 E          1892 1897 1903 1904 1908 1909 1920 1925 1926 1928 1929 1931 1932 1937
 E          1939 1944 1945 1951 1952 1954 1955 1958 1960 1963 1964 1968 1969 1970
 E          1975 1979 1980 1981 1983 1986 1988 1993 1994 1995 1997 1999] and [6]
 E       assert 0 == 1
 E        +  where 0 = round(0.0)
 E        +    where 0.0 = float(0)
 E        +  and   1 = round(1.0)
 E        +    where 1.0 = float(1.0)

 a          = array([   0,    7,    8,   13,   16,   17,   18,   19,   20,   34,   35,
         37,   39,   43,   50,   51,   54,   ...  1964, 1968, 1969, 1970, 1975, 1979, 1980, 1981, 1983, 1986, 1988,
       1993, 1994, 1995, 1997, 1999], dtype=uint32)
 a_length   = 838
 b          = array([6], dtype=uint32)
 b_length   = 1
 capability = 'sve_i8'
 dtype      = 'uint32'
 expected   = 0
 first_length_bound = 1000
 result     = 1.0
 second_length_bound = 10

 scripts/test.py:1059: AssertionError
 =========================== short test summary info ============================
 SKIPPED [240] scripts/test.py:853: SciPy is not installed
 SKIPPED [240] scripts/test.py:884: SciPy is not installed
 SKIPPED [1440] scripts/test.py:938: Lacks overflow protection: https://github.com/ashvardanian/SimSIMD/issues/206
 SKIPPED [320] scripts/test.py:972: Lacks overflow protection: https://github.com/ashvardanian/SimSIMD/issues/206
 SKIPPED [72] scripts/test.py:1181: SciPy is not installed
 SKIPPED [288] scripts/test.py:1266: SciPy is not installed
 SKIPPED [36] scripts/test.py:1317: SciPy is not installed
 SKIPPED [480] scripts/test.py:1392: SciPy is not installed
 FAILED scripts/test.py::test_curved[neon-mahalanobis-dtypes1-11-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon-mahalanobis-dtypes1-97-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_f16-mahalanobis-dtypes1-11-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_f16-mahalanobis-dtypes1-11-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_f16-mahalanobis-dtypes1-97-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_f16-mahalanobis-dtypes1-97-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_f16-mahalanobis-dtypes1-97-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_bf16-mahalanobis-dtypes1-11-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_bf16-mahalanobis-dtypes1-11-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_bf16-mahalanobis-dtypes1-97-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_i8-mahalanobis-dtypes1-11-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_i8-mahalanobis-dtypes1-11-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_i8-mahalanobis-dtypes1-11-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[neon_i8-mahalanobis-dtypes1-97-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve-mahalanobis-dtypes1-11-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve-mahalanobis-dtypes1-97-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve-mahalanobis-dtypes1-97-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve-mahalanobis-dtypes1-97-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve-mahalanobis-dtypes1-97-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_f16-mahalanobis-dtypes1-11-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_f16-mahalanobis-dtypes1-11-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_f16-mahalanobis-dtypes1-97-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_f16-mahalanobis-dtypes1-97-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_bf16-mahalanobis-dtypes1-11-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_bf16-mahalanobis-dtypes1-97-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_bf16-mahalanobis-dtypes1-97-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_i8-mahalanobis-dtypes1-11-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_i8-mahalanobis-dtypes1-11-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_i8-mahalanobis-dtypes1-11-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_i8-mahalanobis-dtypes1-97-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_i8-mahalanobis-dtypes1-97-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_i8-mahalanobis-dtypes1-97-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved[sve_i8-mahalanobis-dtypes1-97-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon-mahalanobis-11-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon-mahalanobis-11-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon-mahalanobis-11-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon-mahalanobis-16-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon-mahalanobis-16-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon-mahalanobis-16-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon-mahalanobis-33-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_f16-mahalanobis-11-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_f16-mahalanobis-11-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_f16-mahalanobis-16-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_f16-mahalanobis-16-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_f16-mahalanobis-16-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_f16-mahalanobis-33-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_f16-mahalanobis-33-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_f16-mahalanobis-33-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_bf16-mahalanobis-11-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_bf16-mahalanobis-11-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_bf16-mahalanobis-11-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_bf16-mahalanobis-16-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_bf16-mahalanobis-16-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_bf16-mahalanobis-33-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_bf16-mahalanobis-33-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_i8-mahalanobis-11-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_i8-mahalanobis-11-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_i8-mahalanobis-11-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_i8-mahalanobis-16-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[neon_i8-mahalanobis-33-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve-mahalanobis-11-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve-mahalanobis-11-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve-mahalanobis-11-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve-mahalanobis-16-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve-mahalanobis-16-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve-mahalanobis-33-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve-mahalanobis-33-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve-mahalanobis-33-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_f16-mahalanobis-11-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_f16-mahalanobis-11-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_f16-mahalanobis-11-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_f16-mahalanobis-16-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_f16-mahalanobis-16-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_f16-mahalanobis-16-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_f16-mahalanobis-33-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_f16-mahalanobis-33-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_bf16-mahalanobis-11-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_bf16-mahalanobis-11-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_bf16-mahalanobis-33-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_bf16-mahalanobis-33-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_i8-mahalanobis-11-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_i8-mahalanobis-11-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_i8-mahalanobis-16-2-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_i8-mahalanobis-16-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_i8-mahalanobis-33-1-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_i8-mahalanobis-33-3-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_i8-mahalanobis-33-4-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_curved_bf16[sve_i8-mahalanobis-33-5-5] - RuntimeWarning: invalid value encountered in sqrt
 FAILED scripts/test.py::test_intersect[neon-10-10-uint16-3-5] - AssertionError: Missing [2] from [ 0  2 12 17 19] and [ 1  2  5 16]
 FAILED scripts/test.py::test_intersect[neon-10-10-uint16-4-5] - AssertionError: Missing [3 4] from [ 0  3  4  7 10 12 13 16] and [ 2  3  4 ...
 FAILED scripts/test.py::test_intersect[neon-10-10-uint32-4-5] - AssertionError: Missing [] from [ 0  2  9 10 12 14] and [ 1  4 11]
 FAILED scripts/test.py::test_intersect[neon-10-100-uint16-5-5] - AssertionError: Missing [12 16 18] from [  0   6  12  16  18  19  21  23  2...
 FAILED scripts/test.py::test_intersect[neon-10-1000-uint16-3-5] - AssertionError: Missing [] from [   0    2    6    7    8   13   14   19   ...
 FAILED scripts/test.py::test_intersect[neon_f16-10-10-uint16-3-5] - AssertionError: Missing [15 18] from [ 0  2 15 17 18] and [ 4  7 15 18]
 FAILED scripts/test.py::test_intersect[neon_f16-10-100-uint32-5-5] - AssertionError: Missing [6] from [  0   6   9  11  12  13  16  19  26  28  ...
 FAILED scripts/test.py::test_intersect[neon_f16-10-1000-uint16-1-5] - AssertionError: Missing [1] from [   0    1   12   20   25   27   28   29  ...
 FAILED scripts/test.py::test_intersect[neon_f16-100-10-uint16-2-5] - AssertionError: Missing [] from [ 0  7 16 19] and [ 29  53  79 114 144 164]
 FAILED scripts/test.py::test_intersect[neon_f16-100-100-uint16-2-5] - AssertionError: Missing [137 195] from [  0   4  12  13  15  16  20  21  24...
 FAILED scripts/test.py::test_intersect[neon_f16-100-1000-uint16-5-5] - AssertionError: Missing [ 19  23  54 150 163] from [   0    1    3    4    ...
 FAILED scripts/test.py::test_intersect[neon_f16-100-1000-uint32-4-5] - AssertionError: Missing [114 148 186 195] from [   0    4    7   11   18   ...
 FAILED scripts/test.py::test_intersect[neon_bf16-10-100-uint16-3-5] - AssertionError: Missing [] from [  0   8   9  11  13  18  21  23  24  25  2...
 FAILED scripts/test.py::test_intersect[neon_bf16-10-100-uint32-2-5] - AssertionError: Missing [ 5 11] from [  0   3   5   7  11  13  19  20  22  ...
 FAILED scripts/test.py::test_intersect[neon_bf16-10-100-uint32-4-5] - AssertionError: Missing [ 3 17] from [  0   3   6   8   9  10  13  17  24  ...
 FAILED scripts/test.py::test_intersect[neon_bf16-10-1000-uint16-1-5] - AssertionError: Missing [8] from [   0    8    9   11   12   16   18   32  ...
 FAILED scripts/test.py::test_intersect[neon_bf16-10-1000-uint16-5-5] - AssertionError: Missing [] from [   0    2   16   19   23   34   35   36   ...
 FAILED scripts/test.py::test_intersect[neon_bf16-100-10-uint16-5-5] - AssertionError: Missing [ 1 16] from [ 0  1 12 15 16] and [  1   9  16  18 ...
 FAILED scripts/test.py::test_intersect[neon_i8-10-10-uint16-1-5] - AssertionError: Missing [] from [ 0  1  3  5 10 15 16] and [17]
 FAILED scripts/test.py::test_intersect[neon_i8-10-10-uint16-3-5] - AssertionError: Missing [] from [ 0  3  9 10 11 18] and [ 1  2  7 15 19]
 FAILED scripts/test.py::test_intersect[neon_i8-10-10-uint32-4-5] - AssertionError: Missing [ 6 18] from [ 0  5  6 12 13 14 15 18] and [ 6 17 18]
 FAILED scripts/test.py::test_intersect[neon_i8-10-10-uint32-5-5] - AssertionError: Missing [2] from [0 2 6] and [2 9]
 FAILED scripts/test.py::test_intersect[neon_i8-10-100-uint16-3-5] - AssertionError: Missing [] from [  0   7  10  12  13  15  17  20  22  28  3...
 FAILED scripts/test.py::test_intersect[neon_i8-10-1000-uint16-1-5] - AssertionError: Missing [] from [   0    2    7   13   15   16   17   19   ...
 FAILED scripts/test.py::test_intersect[neon_i8-10-1000-uint32-4-5] - AssertionError: Missing [] from [   0    1    7    8   12   16   24   25   ...
 FAILED scripts/test.py::test_intersect[neon_i8-100-1000-uint16-2-5] - AssertionError: Missing [ 67  72 120 186] from [   0    1    2    3    6   ...
 FAILED scripts/test.py::test_intersect[sve-10-10-uint16-2-5] - AssertionError: Missing [] from [ 0  2 12 17 19] and [8]
 FAILED scripts/test.py::test_intersect[sve-10-10-uint16-4-5] - AssertionError: Missing [11] from [ 0  2 11] and [ 1  5  9 11 13 16 19]
 FAILED scripts/test.py::test_intersect[sve-10-10-uint16-5-5] - AssertionError: Missing [] from [ 0 11 19] and [ 3  4  5  9 12 15 18]
 FAILED scripts/test.py::test_intersect[sve-10-10-uint32-1-5] - AssertionError: Missing [3 6] from [ 0  2  3  6 10 12 14 15] and [ 3  4  5 ...
 FAILED scripts/test.py::test_intersect[sve-10-100-uint16-4-5] - AssertionError: Missing [10] from [  0   1   3   5  10  11  12  13  14  15 ...
 FAILED scripts/test.py::test_intersect[sve-10-100-uint32-1-5] - AssertionError: Missing [2] from [  0   2   5   7   8  11  15  17  18  19  ...
 FAILED scripts/test.py::test_intersect[sve-10-100-uint32-4-5] - AssertionError: Missing [10 13] from [  0   4  10  13  24  34  35  39  40  ...
 FAILED scripts/test.py::test_intersect[sve-10-1000-uint16-2-5] - AssertionError: Missing [] from [   0    2    7   29   32   36   40   42   ...
 FAILED scripts/test.py::test_intersect[sve-100-100-uint16-5-5] - AssertionError: Missing [] from [  0   2   9  24  39  41  44  53  55  58  6...
 FAILED scripts/test.py::test_intersect[sve-1000-10-uint32-5-5] - AssertionError: Missing [] from [ 0  2 11 18] and [ 471  503  813 1110 1435...
 FAILED scripts/test.py::test_intersect[sve_f16-10-10-uint32-3-5] - AssertionError: Missing [] from [ 0  7 17] and [2]
 FAILED scripts/test.py::test_intersect[sve_f16-10-10-uint32-5-5] - AssertionError: Missing [ 9 15] from [ 0  5  6  7  9 10 15] and [ 3  9 13 1...
 FAILED scripts/test.py::test_intersect[sve_f16-10-100-uint16-2-5] - AssertionError: Missing [] from [  0   1   6   8  27  32  37  40  46  50  5...
 FAILED scripts/test.py::test_intersect[sve_f16-10-100-uint32-2-5] - AssertionError: Missing [2 5 7] from [  0   1   2   5   7  10  14  17  22  ...
 FAILED scripts/test.py::test_intersect[sve_f16-10-1000-uint16-1-5] - AssertionError: Missing [ 5  7 14 16] from [   0    3    5    6    7   12  ...
 FAILED scripts/test.py::test_intersect[sve_f16-10-1000-uint16-2-5] - AssertionError: Missing [13] from [   0    3    4    5    8   13   14   15 ...
 FAILED scripts/test.py::test_intersect[sve_f16-10-1000-uint16-4-5] - AssertionError: Missing [] from [   0    7   11   18   35   40   46   66   ...
 FAILED scripts/test.py::test_intersect[sve_f16-10-1000-uint32-2-5] - AssertionError: Missing [15] from [   0    1    2    3    6    7    9   10 ...
 FAILED scripts/test.py::test_intersect[sve_f16-100-100-uint16-1-5] - AssertionError: Missing [ 9 26 54] from [  0   9  13  16  20  26  27  36  3...
 FAILED scripts/test.py::test_intersect[sve_f16-100-1000-uint32-2-5] - AssertionError: Missing [ 37 149 181] from [   0    2    5   10   14   18  ...
 FAILED scripts/test.py::test_intersect[sve_bf16-10-10-uint32-2-5] - AssertionError: Missing [7 9] from [ 0  6  7  8  9 17] and [ 7  9 12 15 16 18]
 FAILED scripts/test.py::test_intersect[sve_bf16-10-10-uint32-4-5] - AssertionError: Missing [] from [ 0  3  6 11 14 17 18] and [2]
 FAILED scripts/test.py::test_intersect[sve_bf16-10-100-uint16-1-5] - AssertionError: Missing [12] from [  0   3   6  12  13  14  30  33  36  38 ...
 FAILED scripts/test.py::test_intersect[sve_bf16-10-100-uint16-4-5] - AssertionError: Missing [ 1 10 19] from [  0   1  10  13  15  19  20  24  2...
 FAILED scripts/test.py::test_intersect[sve_bf16-10-1000-uint32-3-5] - AssertionError: Missing [13 18] from [   0    1    4    9   11   13   14   ...
 FAILED scripts/test.py::test_intersect[sve_bf16-10-1000-uint32-5-5] - AssertionError: Missing [] from [   0    2    3    4    6    9   10   11   ...
 FAILED scripts/test.py::test_intersect[sve_bf16-100-10-uint16-5-5] - AssertionError: Missing [15] from [ 0  5  8  9 15 16 19] and [  2  15  27  ...
 FAILED scripts/test.py::test_intersect[sve_i8-10-10-uint16-5-5] - AssertionError: Missing [ 7 12] from [ 0  2  6  7 10 12 15] and [ 5  7  8 1...
 FAILED scripts/test.py::test_intersect[sve_i8-10-10-uint32-4-5] - AssertionError: Missing [] from [ 0  1  6  7  9 14] and [13 16 17]
 FAILED scripts/test.py::test_intersect[sve_i8-10-100-uint16-3-5] - AssertionError: Missing [] from [  0   2  10  19  20  38  45  48  54  58  5...
 FAILED scripts/test.py::test_intersect[sve_i8-10-100-uint32-1-5] - AssertionError: Missing [ 9 11] from [  0   9  11  13  19  29  31  33  50  ...
 FAILED scripts/test.py::test_intersect[sve_i8-10-100-uint32-3-5] - AssertionError: Missing [] from [  0   1   6   8  10  11  19  30  33  37  4...
 FAILED scripts/test.py::test_intersect[sve_i8-10-1000-uint16-1-5] - AssertionError: Missing [] from [   0   16   30   35   39   56   72   83   ...
 FAILED scripts/test.py::test_intersect[sve_i8-10-1000-uint32-5-5] - AssertionError: Missing [] from [   0    7    8   13   16   17   18   19   ...
 ========== 148 failed, 6126 passed, 3116 skipped in 182.09s (0:03:02) ==========