From 94e053ac10599235bc9920f3a19e373078a6364a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 11 Apr 2026 19:27:31 +0200 Subject: [PATCH] Work around miscompilation of the AVX512 ?GEMM kernels by Windows LLVM --- kernel/x86_64/KERNEL.SKYLAKEX | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/x86_64/KERNEL.SKYLAKEX b/kernel/x86_64/KERNEL.SKYLAKEX index 7e946ef2ea..32ea75b902 100644 --- a/kernel/x86_64/KERNEL.SKYLAKEX +++ b/kernel/x86_64/KERNEL.SKYLAKEX @@ -1,7 +1,12 @@ include $(KERNELDIR)/KERNEL.HASWELL +ifeq ($(C_COMPILER)$(OS_WINDOWS), CLANG1) +SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S +STRMMKERNEL = sgemm_kernel_16x4_skylakex.S +else SGEMMKERNEL = sgemm_kernel_16x4_skylakex_3.c STRMMKERNEL = sgemm_kernel_16x4_skylakex_2.c +endif SGEMMINCOPY = ../generic/gemm_ncopy_16.c SGEMMITCOPY = sgemm_tcopy_16_skylakex.c SGEMMONCOPY = sgemm_ncopy_4_skylakex.c @@ -20,8 +25,13 @@ SGEMM_SMALL_K_B0_TN = sgemm_small_kernel_tn_skylakex.c SGEMM_SMALL_K_TT = sgemm_small_kernel_tt_skylakex.c SGEMM_SMALL_K_B0_TT = sgemm_small_kernel_tt_skylakex.c +ifeq ($(C_COMPILER)$(OS_WINDOWS), CLANG1) +DGEMMKERNEL = dgemm_kernel_16x2_skylakex.S +DTRMMKERNEL = dgemm_kernel_16x2_skylakex.S +else DGEMMKERNEL = dgemm_kernel_16x2_skylakex.c DTRMMKERNEL = dgemm_kernel_16x2_skylakex.c +endif DGEMMINCOPY = ../generic/gemm_ncopy_16.c DGEMMITCOPY = dgemm_tcopy_16_skylakex.c DGEMMONCOPY = ../generic/gemm_ncopy_2.c @@ -41,8 +51,13 @@ DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_skylakex.c SGEMM_BETA = sgemm_beta_skylakex.c DGEMM_BETA = dgemm_beta_skylakex.c +ifeq ($(C_COMPILER)$(OS_WINDOWS), CLANG1) +CGEMMKERNEL = cgemm_kernel_8x2_haswell.c +ZGEMMKERNEL = zgemm_kernel_4x2_haswell.c +else CGEMMKERNEL = cgemm_kernel_8x2_skylakex.c ZGEMMKERNEL = zgemm_kernel_4x2_skylakex.c +endif CASUMKERNEL = casum.c ZASUMKERNEL = zasum.c