diff --git a/.container_build_image b/.container_build_image new file mode 100644 index 0000000000000..af38467697a9e --- /dev/null +++ b/.container_build_image @@ -0,0 +1 @@ +rocky-9-kernel-builder diff --git a/.github/workflows/kernel-build-and-test-multiarch-trigger.yml b/.github/workflows/kernel-build-and-test-multiarch-trigger.yml new file mode 100644 index 0000000000000..31a4b41665512 --- /dev/null +++ b/.github/workflows/kernel-build-and-test-multiarch-trigger.yml @@ -0,0 +1,11 @@ +name: Trigger Automated kernel build and test (multi-arch) + +on: + push: + branches: + - '*_rlc-9/**' + +jobs: + kernelCI: + uses: ctrliq/kernel-src-tree/.github/workflows/kernel-build-and-test-multiarch-trigger.yml@main + secrets: inherit diff --git a/.github/workflows/validate-kernel-commits.yml b/.github/workflows/validate-kernel-commits.yml new file mode 100644 index 0000000000000..c74434336e251 --- /dev/null +++ b/.github/workflows/validate-kernel-commits.yml @@ -0,0 +1,10 @@ +name: Validate Kernel Commits + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + check: + uses: ctrliq/kernel-src-tree/.github/workflows/validate-kernel-commits.yml@main + secrets: inherit diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 29c8e3936f93b..67939c6fed9af 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -128,11 +128,22 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #3324349 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A76 | #4193800 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A76AE | #4193801 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1491015 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ -<<<<<<< HEAD:Documentation/arm64/silicon-errata.rst +| ARM | Cortex-A77 | #4193798 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78 | #4193791 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78AE | #4193793 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78C | #4193794 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 | @@ -140,13 +151,12 @@ stable kernels. | ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 | -======= ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #3324348 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A78 | #3324344 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A78C | #3324346,3324347| ARM64_ERRATUM_3194386 | ->>>>>>> adeec61a4723 (arm64: errata: Expand speculative SSBS workaround (again)):Documentation/arch/arm64/silicon-errata.rst +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ @@ -156,6 +166,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #3324338 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #4193788 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A715 | #3456084 | ARM64_ERRATUM_3194386 | @@ -170,18 +182,30 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X1C | #3324346 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1 | #4193791 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1C | #4193792 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #3324338 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X2 | #4193788 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X3 | #3324335 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X3 | #4193786 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X4 | #3194386 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X4 | #4118414 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X925 | #3324334 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X925 | #4193781 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | @@ -192,6 +216,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #3324349 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N1 | #4193800 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | @@ -206,13 +232,31 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V1 | #3324341 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #4193789 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V2 | #3324336 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V2 | #4193787 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA| | | | #562869,1047329 | | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V3 | #4193784 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V3AE | #4193784 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V1 | #1619801 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V1 | #4193790 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | C1-Premium | #4193780 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | C1-Ultra | #4193780 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | MMU-500 | #841119,826419 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-600 | #1076982,1209401| N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-700 | #2268618,2812531| N/A | @@ -252,6 +296,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | NVIDIA | Carmel Core | N/A | NVIDIA_CARMEL_CNP_ERRATUM | +----------------+-----------------+-----------------+-----------------------------+ +| NVIDIA | Olympus core | T410-OLY-1029 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ | NVIDIA | T241 GICv3/4.x | T241-FABRIC-4 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ @@ -308,3 +354,5 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Microsoft | Azure Cobalt 100| #3324339 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #4193789 | ARM64_ERRATUM_4118414 | ++----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 60c6bfae4f996..fba3c2dee4544 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1119,6 +1119,44 @@ config ARM64_ERRATUM_3194386 If unsure, say Y. +config ARM64_ERRATUM_4118414 + bool "Various: Completion of affected memory accesses might not be guaranteed by completion of a TLBI" + default y + select ARM64_WORKAROUND_REPEAT_TLBI + help + This option adds a workaround for the following errata: + + * ARM C1-Premium erratum 4193780 + * ARM C1-Ultra erratum 4193780 + * ARM Cortex-A76 erratum 4193800 + * ARM Cortex-A76AE erratum 4193801 + * ARM Cortex-A77 erratum 4193798 + * ARM Cortex-A78 erratum 4193791 + * ARM Cortex-A78AE erratum 4193793 + * ARM Cortex-A78C erratum 4193794 + * ARM Cortex-A710 erratum 4193788 + * ARM Cortex-X1 erratum 4193791 + * ARM Cortex-X1C erratum 4193792 + * ARM Cortex-X2 erratum 4193788 + * ARM Cortex-X3 erratum 4193786 + * ARM Cortex-X4 erratum 4118414 + * ARM Cortex-X925 erratum 4193781 + * ARM Neoverse-N1 erratum 4193800 + * ARM Neoverse-N2 erratum 4193789 + * ARM Neoverse-V1 erratum 4193790 + * ARM Neoverse-V2 erratum 4193787 + * ARM Neoverse-V3 erratum 4193784 + * ARM Neoverse-V3AE erratum 4193784 + * Microsoft Azure Cobalt 100 4193789 + * NVIDIA Olympus erratum T410-OLY-1029 + + On affected cores, some memory accesses might not be completed by + broadcast TLB invalidation. + + This issue is also known as CVE-2025-10263. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 751b78cd31349..34440a3cb5a6f 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -93,11 +93,15 @@ #define ARM_CPU_PART_NEOVERSE_V2 0xD4F #define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 +#define ARM_CPU_PART_NEOVERSE_V3AE 0xD83 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 #define ARM_CPU_PART_CORTEX_A725 0xD87 #define ARM_CPU_PART_CORTEX_A720AE 0xD89 +#define ARM_CPU_PART_C1_ULTRA 0xD8C #define ARM_CPU_PART_NEOVERSE_N3 0xD8E +#define ARM_CPU_PART_C1_PRO 0xD8B +#define ARM_CPU_PART_C1_PREMIUM 0xD90 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -128,6 +132,7 @@ #define NVIDIA_CPU_PART_DENVER 0x003 #define NVIDIA_CPU_PART_CARMEL 0x004 +#define NVIDIA_CPU_PART_OLYMPUS 0x010 #define FUJITSU_CPU_PART_A64FX 0x001 @@ -180,11 +185,15 @@ #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) #define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) +#define MIDR_NEOVERSE_V3AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3AE) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) #define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_CORTEX_A720AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720AE) +#define MIDR_C1_ULTRA MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_C1_ULTRA) #define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3) +#define MIDR_C1_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_C1_PRO) +#define MIDR_C1_PREMIUM MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_C1_PREMIUM) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -207,6 +216,7 @@ #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) +#define MIDR_NVIDIA_OLYMPUS MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_OLYMPUS) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) #define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e08910c404502..5c8887162d219 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -225,7 +225,37 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1), }, #endif - {}, +#ifdef CONFIG_ARM64_ERRATUM_4118414 + { + ERRATA_MIDR_RANGE_LIST(((const struct midr_range[]) { + MIDR_ALL_VERSIONS(MIDR_C1_PREMIUM), + MIDR_ALL_VERSIONS(MIDR_C1_ULTRA), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76AE), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3AE), + MIDR_ALL_VERSIONS(MIDR_NVIDIA_OLYMPUS), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), + {} + })), + }, +#endif + {} }; #endif @@ -560,7 +590,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI { - .desc = "Qualcomm erratum 1009, or ARM erratum 1286807, 2441009", + .desc = "Broken broadcast TLBI completion", .capability = ARM64_WORKAROUND_REPEAT_TLBI, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = cpucap_multi_entry_cap_matches, diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index b9ffd7e3f2a1b..c8bbb611f30b8 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -61,22 +61,27 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) * again, and there is no reason to affect the whole VM for this. */ num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU; - tmp = kvrealloc(kvm->arch.nested_mmus, - size_mul(sizeof(*kvm->arch.nested_mmus), kvm->arch.nested_mmus_size), - size_mul(sizeof(*kvm->arch.nested_mmus), num_mmus), - GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!tmp) - return -ENOMEM; + if (num_mmus > kvm->arch.nested_mmus_size) { + tmp = kvcalloc(num_mmus, sizeof(*tmp), GFP_KERNEL_ACCOUNT); + if (!tmp) + return -ENOMEM; - swap(kvm->arch.nested_mmus, tmp); + write_lock(&kvm->mmu_lock); - /* - * If we went through a realocation, adjust the MMU back-pointers in - * the previously initialised kvm_pgtable structures. - */ - if (kvm->arch.nested_mmus != tmp) - for (int i = 0; i < kvm->arch.nested_mmus_size; i++) - kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i]; + if (kvm->arch.nested_mmus_size) { + memcpy(tmp, kvm->arch.nested_mmus, + size_mul(sizeof(*tmp), kvm->arch.nested_mmus_size)); + + for (int i = 0; i < kvm->arch.nested_mmus_size; i++) + tmp[i].pgt->mmu = &tmp[i]; + } + + swap(kvm->arch.nested_mmus, tmp); + + write_unlock(&kvm->mmu_lock); + + kvfree(tmp); + } for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++) ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]); diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 9debb2f184223..9998f4ce4c875 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -627,8 +627,10 @@ static void vgic_its_invalidate_cache(struct vgic_its *its) unsigned long idx; xa_for_each(&its->translation_cache, idx, irq) { - xa_erase(&its->translation_cache, idx); - vgic_put_irq(kvm, irq); + /* Only the context that erases the entry drops its cache ref. */ + irq = xa_erase(&its->translation_cache, idx); + if (irq) + vgic_put_irq(kvm, irq); } } diff --git a/configs/kernel-x86_64-debug-rhel.config b/configs/kernel-x86_64-debug-rhel.config index d99b95c1e81fe..d29b98c9dd1bf 100644 --- a/configs/kernel-x86_64-debug-rhel.config +++ b/configs/kernel-x86_64-debug-rhel.config @@ -7353,3 +7353,14 @@ CONFIG_ZSWAP=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set + +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_FIPS_SIGNATURE_SELFTEST=y +CONFIG_FIPS_SIGNATURE_SELFTEST_RSA=y +CONFIG_FIPS_SIGNATURE_SELFTEST_ECDSA=y +CONFIG_CRYPTO_DRBG=y +CONFIG_CRYPTO_FIPS=y +CONFIG_CRYPTO_FIPS_CUSTOM_VERSION=y +CONFIG_CRYPTO_FIPS_VERSION="rocky9.20250725" +CONFIG_CRYPTO_FIPS_NAME="Rocky Linux 9 Kernel Cryptographic API" diff --git a/configs/kernel-x86_64-rhel.config b/configs/kernel-x86_64-rhel.config index 3fa1e61f63802..38c9519b4bf6c 100644 --- a/configs/kernel-x86_64-rhel.config +++ b/configs/kernel-x86_64-rhel.config @@ -7330,3 +7330,14 @@ CONFIG_ZSWAP=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set + +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_FIPS_SIGNATURE_SELFTEST=y +CONFIG_FIPS_SIGNATURE_SELFTEST_RSA=y +CONFIG_FIPS_SIGNATURE_SELFTEST_ECDSA=y +CONFIG_CRYPTO_DRBG=y +CONFIG_CRYPTO_FIPS=y +CONFIG_CRYPTO_FIPS_CUSTOM_VERSION=y +CONFIG_CRYPTO_FIPS_VERSION="rocky9.20250725" +CONFIG_CRYPTO_FIPS_NAME="Rocky Linux 9 Kernel Cryptographic API" diff --git a/crypto/Kconfig b/crypto/Kconfig index 79056212e933f..a2b6c43b05a27 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -23,12 +23,12 @@ if CRYPTO comment "Crypto core or helper" config CRYPTO_FIPS - bool "FIPS 200 compliance" - depends on (CRYPTO_ANSI_CPRNG || CRYPTO_DRBG) && !CRYPTO_MANAGER_DISABLE_TESTS + bool "FIPS compliance" + depends on CRYPTO_DRBG=y && !CRYPTO_MANAGER_DISABLE_TESTS depends on (MODULE_SIG || !MODULES) help This option enables the fips boot option which is - required if you want the system to operate in a FIPS 200 + required if you want the system to operate in a FIPS certification. You should say no unless you know what this is. @@ -2037,6 +2037,7 @@ config CRYPTO_ANSI_CPRNG tristate "Pseudo Random Number Generation for Cryptographic modules" select CRYPTO_AES select CRYPTO_RNG + select CRYPTO_SHA3 help This option enables the generic pseudo random number generator for cryptographic modules. Uses the Algorithm specified in diff --git a/crypto/aead.c b/crypto/aead.c index 16991095270d2..c4ece86c45bc4 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -35,8 +35,7 @@ static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key, alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = crypto_aead_alg(tfm)->setkey(tfm, alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); + kfree_sensitive(buffer); return ret; } diff --git a/crypto/cipher.c b/crypto/cipher.c index b47141ed4a9f3..395f0c2fbb9ff 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -34,8 +34,7 @@ static int setkey_unaligned(struct crypto_cipher *tfm, const u8 *key, alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = cia->cia_setkey(crypto_cipher_tfm(tfm), alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); + kfree_sensitive(buffer); return ret; } diff --git a/crypto/drbg.c b/crypto/drbg.c index accf425de57f7..d14cc09b5d399 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1283,6 +1283,12 @@ static inline int drbg_alloc_state(struct drbg_state *drbg) if (ret < 0) goto err; + /* + * Align to at least a cache line for better performance. This also + * prevents false sharing of cache lines between different instances. + */ + ret = max(ret, L1_CACHE_BYTES - 1); + drbg->Vbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL); if (!drbg->Vbuf) { ret = -ENOMEM; diff --git a/crypto/ecdh.c b/crypto/ecdh.c index fe8966511e9d7..85c64f1a40df2 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "ecc.h" struct ecdh_ctx { @@ -33,6 +34,8 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, params.key_size > sizeof(u64) * ctx->ndigits) return -EINVAL; + memset(ctx->private_key, 0, sizeof(ctx->private_key)); + if (!params.key || !params.key_size) return ecc_gen_privkey(ctx->curve_id, ctx->ndigits, ctx->private_key); @@ -94,6 +97,36 @@ static int ecdh_compute_value(struct kpp_request *req) ctx->private_key, public_key); buf = public_key; nbytes = public_key_sz; + + /* + * SP800-56Arev3, 5.6.2.1.4: ("Owner Assurance of + * Pair-wise Consistency"): recompute the public key + * and check if the results match. + */ + if (fips_enabled) { + u64 *public_key_pct; + + if (ret < 0) + goto free_all; + + public_key_pct = kmalloc(public_key_sz, GFP_KERNEL); + if (!public_key_pct) { + ret = -ENOMEM; + goto free_all; + } + + ret = ecc_make_pub_key(ctx->curve_id, ctx->ndigits, + ctx->private_key, + public_key_pct); + if (ret < 0) { + kfree(public_key_pct); + goto free_all; + } + + if (memcmp(public_key, public_key_pct, public_key_sz)) + panic("ECDH PCT failed in FIPS mode"); + kfree(public_key_pct); + } } if (ret < 0) diff --git a/crypto/essiv.c b/crypto/essiv.c index 8bcc5bdcb2a95..ec81bdea25631 100644 --- a/crypto/essiv.c +++ b/crypto/essiv.c @@ -114,13 +114,16 @@ static int essiv_aead_setkey(struct crypto_aead *tfm, const u8 *key, crypto_shash_update(desc, keys.enckey, keys.enckeylen) ?: crypto_shash_finup(desc, keys.authkey, keys.authkeylen, salt); if (err) - return err; + goto out; crypto_cipher_clear_flags(tctx->essiv_cipher, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(tctx->essiv_cipher, crypto_aead_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); - return crypto_cipher_setkey(tctx->essiv_cipher, salt, - crypto_shash_digestsize(tctx->hash)); + err = crypto_cipher_setkey(tctx->essiv_cipher, salt, + crypto_shash_digestsize(tctx->hash)); +out: + memzero_explicit(&keys, sizeof(keys)); + return err; } static int essiv_aead_setauthsize(struct crypto_aead *tfm, diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index b9edfaa51b273..4b50cbc8a2faf 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -2,7 +2,7 @@ * Non-physical true random number generator based on timing jitter -- * Linux Kernel Crypto API specific code * - * Copyright Stephan Mueller , 2015 + * Copyright Stephan Mueller , 2015 - 2023 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -37,6 +37,8 @@ * DAMAGE. */ +#include +#include #include #include #include @@ -46,6 +48,8 @@ #include "jitterentropy.h" +#define JENT_CONDITIONING_HASH "sha3-256-generic" + /*************************************************************************** * Helper function ***************************************************************************/ @@ -60,11 +64,6 @@ void jent_zfree(void *ptr) kfree_sensitive(ptr); } -void jent_memcpy(void *dest, const void *src, unsigned int n) -{ - memcpy(dest, src, n); -} - /* * Obtain a high-resolution time stamp value. The time stamp is used to measure * the execution time of a given code path and its variations. Hence, the time @@ -91,6 +90,91 @@ void jent_get_nstime(__u64 *out) *out = tmp; } +int jent_hash_time(void *hash_state, __u64 time, u8 *addtl, + unsigned int addtl_len, __u64 hash_loop_cnt, + unsigned int stuck) +{ + struct shash_desc *hash_state_desc = (struct shash_desc *)hash_state; + SHASH_DESC_ON_STACK(desc, hash_state_desc->tfm); + u8 intermediary[SHA3_256_DIGEST_SIZE]; + __u64 j = 0; + int ret; + + desc->tfm = hash_state_desc->tfm; + + if (sizeof(intermediary) != crypto_shash_digestsize(desc->tfm)) { + pr_warn_ratelimited("Unexpected digest size\n"); + return -EINVAL; + } + + /* + * This loop fills a buffer which is injected into the entropy pool. + * The main reason for this loop is to execute something over which we + * can perform a timing measurement. The injection of the resulting + * data into the pool is performed to ensure the result is used and + * the compiler cannot optimize the loop away in case the result is not + * used at all. Yet that data is considered "additional information" + * considering the terminology from SP800-90A without any entropy. + * + * Note, it does not matter which or how much data you inject, we are + * interested in one Keccack1600 compression operation performed with + * the crypto_shash_final. + */ + for (j = 0; j < hash_loop_cnt; j++) { + ret = crypto_shash_init(desc) ?: + crypto_shash_update(desc, intermediary, + sizeof(intermediary)) ?: + crypto_shash_finup(desc, addtl, addtl_len, intermediary); + if (ret) + goto err; + } + + /* + * Inject the data from the previous loop into the pool. This data is + * not considered to contain any entropy, but it stirs the pool a bit. + */ + ret = crypto_shash_update(desc, intermediary, sizeof(intermediary)); + if (ret) + goto err; + + /* + * Insert the time stamp into the hash context representing the pool. + * + * If the time stamp is stuck, do not finally insert the value into the + * entropy pool. Although this operation should not do any harm even + * when the time stamp has no entropy, SP800-90B requires that any + * conditioning operation to have an identical amount of input data + * according to section 3.1.5. + */ + if (!stuck) { + ret = crypto_shash_update(hash_state_desc, (u8 *)&time, + sizeof(__u64)); + } + +err: + shash_desc_zero(desc); + memzero_explicit(intermediary, sizeof(intermediary)); + + return ret; +} + +int jent_read_random_block(void *hash_state, char *dst, unsigned int dst_len) +{ + struct shash_desc *hash_state_desc = (struct shash_desc *)hash_state; + u8 jent_block[SHA3_256_DIGEST_SIZE]; + /* Obtain data from entropy pool and re-initialize it */ + int ret = crypto_shash_final(hash_state_desc, jent_block) ?: + crypto_shash_init(hash_state_desc) ?: + crypto_shash_update(hash_state_desc, jent_block, + sizeof(jent_block)); + + if (!ret && dst_len) + memcpy(dst, jent_block, dst_len); + + memzero_explicit(jent_block, sizeof(jent_block)); + return ret; +} + /*************************************************************************** * Kernel crypto API interface ***************************************************************************/ @@ -98,32 +182,82 @@ void jent_get_nstime(__u64 *out) struct jitterentropy { spinlock_t jent_lock; struct rand_data *entropy_collector; + struct crypto_shash *tfm; + struct shash_desc *sdesc; }; -static int jent_kcapi_init(struct crypto_tfm *tfm) +static void jent_kcapi_cleanup(struct crypto_tfm *tfm) { struct jitterentropy *rng = crypto_tfm_ctx(tfm); - int ret = 0; - rng->entropy_collector = jent_entropy_collector_alloc(1, 0); - if (!rng->entropy_collector) - ret = -ENOMEM; + spin_lock(&rng->jent_lock); - spin_lock_init(&rng->jent_lock); - return ret; -} + if (rng->sdesc) { + shash_desc_zero(rng->sdesc); + kfree(rng->sdesc); + } + rng->sdesc = NULL; -static void jent_kcapi_cleanup(struct crypto_tfm *tfm) -{ - struct jitterentropy *rng = crypto_tfm_ctx(tfm); + if (rng->tfm) + crypto_free_shash(rng->tfm); + rng->tfm = NULL; - spin_lock(&rng->jent_lock); if (rng->entropy_collector) jent_entropy_collector_free(rng->entropy_collector); rng->entropy_collector = NULL; spin_unlock(&rng->jent_lock); } +static int jent_kcapi_init(struct crypto_tfm *tfm) +{ + struct jitterentropy *rng = crypto_tfm_ctx(tfm); + struct crypto_shash *hash; + struct shash_desc *sdesc; + int size, ret = 0; + + spin_lock_init(&rng->jent_lock); + + /* + * Use SHA3-256 as conditioner. We allocate only the generic + * implementation as we are not interested in high-performance. The + * execution time of the SHA3 operation is measured and adds to the + * Jitter RNG's unpredictable behavior. If we have a slower hash + * implementation, the execution timing variations are larger. When + * using a fast implementation, we would need to call it more often + * as its variations are lower. + */ + hash = crypto_alloc_shash(JENT_CONDITIONING_HASH, 0, 0); + if (IS_ERR(hash)) { + pr_err("Cannot allocate conditioning digest\n"); + return PTR_ERR(hash); + } + rng->tfm = hash; + + size = sizeof(struct shash_desc) + crypto_shash_descsize(hash); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) { + ret = -ENOMEM; + goto err; + } + + sdesc->tfm = hash; + crypto_shash_init(sdesc); + rng->sdesc = sdesc; + + rng->entropy_collector = jent_entropy_collector_alloc(1, 0, sdesc); + if (!rng->entropy_collector) { + ret = -ENOMEM; + goto err; + } + + spin_lock_init(&rng->jent_lock); + return 0; + +err: + jent_kcapi_cleanup(tfm); + return ret; +} + static int jent_kcapi_random(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *rdata, unsigned int dlen) @@ -180,15 +314,24 @@ static struct rng_alg jent_alg = { .cra_module = THIS_MODULE, .cra_init = jent_kcapi_init, .cra_exit = jent_kcapi_cleanup, - } }; static int __init jent_mod_init(void) { + SHASH_DESC_ON_STACK(desc, tfm); + struct crypto_shash *tfm; int ret = 0; - ret = jent_entropy_init(); + tfm = crypto_alloc_shash(JENT_CONDITIONING_HASH, 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + desc->tfm = tfm; + crypto_shash_init(desc); + ret = jent_entropy_init(desc); + shash_desc_zero(desc); + crypto_free_shash(tfm); if (ret) { /* Handle permanent health test error */ if (fips_enabled) diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index 227cedfa4f0ae..5b224d3d7442e 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -2,7 +2,7 @@ * Non-physical true random number generator based on timing jitter -- * Jitter RNG standalone code. * - * Copyright Stephan Mueller , 2015 - 2020 + * Copyright Stephan Mueller , 2015 - 2023 * * Design * ====== @@ -57,21 +57,22 @@ typedef unsigned long long __u64; typedef long long __s64; typedef unsigned int __u32; +typedef unsigned char u8; #define NULL ((void *) 0) /* The entropy pool */ struct rand_data { + /* SHA3-256 is used as conditioner */ +#define DATA_SIZE_BITS 256 /* all data values that are vital to maintain the security * of the RNG are marked as SENSITIVE. A user must not * access that information while the RNG executes its loops to * calculate the next random value. */ - __u64 data; /* SENSITIVE Actual random number */ - __u64 old_data; /* SENSITIVE Previous random number */ - __u64 prev_time; /* SENSITIVE Previous time stamp */ -#define DATA_SIZE_BITS ((sizeof(__u64)) * 8) - __u64 last_delta; /* SENSITIVE stuck test */ - __s64 last_delta2; /* SENSITIVE stuck test */ - unsigned int osr; /* Oversample rate */ + void *hash_state; /* SENSITIVE hash state entropy pool */ + __u64 prev_time; /* SENSITIVE Previous time stamp */ + __u64 last_delta; /* SENSITIVE stuck test */ + __s64 last_delta2; /* SENSITIVE stuck test */ + unsigned int osr; /* Oversample rate */ #define JENT_MEMORY_BLOCKS 64 #define JENT_MEMORY_BLOCKSIZE 32 #define JENT_MEMORY_ACCESSLOOPS 128 @@ -301,15 +302,13 @@ static int jent_permanent_health_failure(struct rand_data *ec) * an entropy collection. * * Input: - * @ec entropy collector struct -- may be NULL * @bits is the number of low bits of the timer to consider * @min is the number of bits we shift the timer value to the right at * the end to make sure we have a guaranteed minimum value * * @return Newly calculated loop counter */ -static __u64 jent_loop_shuffle(struct rand_data *ec, - unsigned int bits, unsigned int min) +static __u64 jent_loop_shuffle(unsigned int bits, unsigned int min) { __u64 time = 0; __u64 shuffle = 0; @@ -317,12 +316,7 @@ static __u64 jent_loop_shuffle(struct rand_data *ec, unsigned int mask = (1<data; + /* * We fold the time value as much as possible to ensure that as many * bits of the time stamp are included as possible. @@ -344,81 +338,32 @@ static __u64 jent_loop_shuffle(struct rand_data *ec, * execution time jitter * * This function injects the individual bits of the time value into the - * entropy pool using an LFSR. + * entropy pool using a hash. * - * The code is deliberately inefficient with respect to the bit shifting - * and shall stay that way. This function is the root cause why the code - * shall be compiled without optimization. This function not only acts as - * folding operation, but this function's execution is used to measure - * the CPU execution time jitter. Any change to the loop in this function - * implies that careful retesting must be done. - * - * @ec [in] entropy collector struct - * @time [in] time stamp to be injected - * @loop_cnt [in] if a value not equal to 0 is set, use the given value as - * number of loops to perform the folding - * @stuck [in] Is the time stamp identified as stuck? + * ec [in] entropy collector + * time [in] time stamp to be injected + * stuck [in] Is the time stamp identified as stuck? * * Output: - * updated ec->data - * - * @return Number of loops the folding operation is performed + * updated hash context in the entropy collector or error code */ -static void jent_lfsr_time(struct rand_data *ec, __u64 time, __u64 loop_cnt, - int stuck) +static int jent_condition_data(struct rand_data *ec, __u64 time, int stuck) { - unsigned int i; - __u64 j = 0; - __u64 new = 0; -#define MAX_FOLD_LOOP_BIT 4 -#define MIN_FOLD_LOOP_BIT 0 - __u64 fold_loop_cnt = - jent_loop_shuffle(ec, MAX_FOLD_LOOP_BIT, MIN_FOLD_LOOP_BIT); - - /* - * testing purposes -- allow test app to set the counter, not - * needed during runtime - */ - if (loop_cnt) - fold_loop_cnt = loop_cnt; - for (j = 0; j < fold_loop_cnt; j++) { - new = ec->data; - for (i = 1; (DATA_SIZE_BITS) >= i; i++) { - __u64 tmp = time << (DATA_SIZE_BITS - i); - - tmp = tmp >> (DATA_SIZE_BITS - 1); - - /* - * Fibonacci LSFR with polynomial of - * x^64 + x^61 + x^56 + x^31 + x^28 + x^23 + 1 which is - * primitive according to - * http://poincare.matf.bg.ac.rs/~ezivkovm/publications/primpol1.pdf - * (the shift values are the polynomial values minus one - * due to counting bits from 0 to 63). As the current - * position is always the LSB, the polynomial only needs - * to shift data in from the left without wrap. - */ - tmp ^= ((new >> 63) & 1); - tmp ^= ((new >> 60) & 1); - tmp ^= ((new >> 55) & 1); - tmp ^= ((new >> 30) & 1); - tmp ^= ((new >> 27) & 1); - tmp ^= ((new >> 22) & 1); - new <<= 1; - new ^= tmp; - } - } - - /* - * If the time stamp is stuck, do not finally insert the value into - * the entropy pool. Although this operation should not do any harm - * even when the time stamp has no entropy, SP800-90B requires that - * any conditioning operation (SP800-90B considers the LFSR to be a - * conditioning operation) to have an identical amount of input - * data according to section 3.1.5. - */ - if (!stuck) - ec->data = new; +#define SHA3_HASH_LOOP (1<<3) + struct { + int rct_count; + unsigned int apt_observations; + unsigned int apt_count; + unsigned int apt_base; + } addtl = { + ec->rct_count, + ec->apt_observations, + ec->apt_count, + ec->apt_base + }; + + return jent_hash_time(ec->hash_state, time, (u8 *)&addtl, sizeof(addtl), + SHA3_HASH_LOOP, stuck); } /* @@ -452,7 +397,7 @@ static void jent_memaccess(struct rand_data *ec, __u64 loop_cnt) #define MAX_ACC_LOOP_BIT 7 #define MIN_ACC_LOOP_BIT 0 __u64 acc_loop_cnt = - jent_loop_shuffle(ec, MAX_ACC_LOOP_BIT, MIN_ACC_LOOP_BIT); + jent_loop_shuffle(MAX_ACC_LOOP_BIT, MIN_ACC_LOOP_BIT); if (NULL == ec || NULL == ec->mem) return; @@ -520,14 +465,15 @@ static int jent_measure_jitter(struct rand_data *ec) stuck = jent_stuck(ec, current_delta); /* Now call the next noise sources which also injects the data */ - jent_lfsr_time(ec, current_delta, 0, stuck); + if (jent_condition_data(ec, current_delta, stuck)) + stuck = 1; return stuck; } /* * Generator of one 64 bit random number - * Function fills rand_data->data + * Function fills rand_data->hash_state * * @ec [in] Reference to entropy collector */ @@ -574,7 +520,7 @@ static void jent_gen_entropy(struct rand_data *ec) * @return 0 when request is fulfilled or an error * * The following error codes can occur: - * -1 entropy_collector is NULL + * -1 entropy_collector is NULL or the generation failed * -2 Intermittent health failure * -3 Permanent health failure */ @@ -604,7 +550,7 @@ int jent_read_entropy(struct rand_data *ec, unsigned char *data, * Perform startup health tests and return permanent * error if it fails. */ - if (jent_entropy_init()) + if (jent_entropy_init(ec->hash_state)) return -3; return -2; @@ -614,7 +560,8 @@ int jent_read_entropy(struct rand_data *ec, unsigned char *data, tocopy = (DATA_SIZE_BITS / 8); else tocopy = len; - jent_memcpy(p, &ec->data, tocopy); + if (jent_read_random_block(ec->hash_state, p, tocopy)) + return -1; len -= tocopy; p += tocopy; @@ -628,7 +575,8 @@ int jent_read_entropy(struct rand_data *ec, unsigned char *data, ***************************************************************************/ struct rand_data *jent_entropy_collector_alloc(unsigned int osr, - unsigned int flags) + unsigned int flags, + void *hash_state) { struct rand_data *entropy_collector; @@ -655,6 +603,8 @@ struct rand_data *jent_entropy_collector_alloc(unsigned int osr, osr = 1; /* minimum sampling rate is 1 */ entropy_collector->osr = osr; + entropy_collector->hash_state = hash_state; + /* fill the data pad with non-zero values */ jent_gen_entropy(entropy_collector); @@ -668,7 +618,7 @@ void jent_entropy_collector_free(struct rand_data *entropy_collector) jent_zfree(entropy_collector); } -int jent_entropy_init(void) +int jent_entropy_init(void *hash_state) { int i; __u64 delta_sum = 0; @@ -681,6 +631,7 @@ int jent_entropy_init(void) /* Required for RCT */ ec.osr = 1; + ec.hash_state = hash_state; /* We could perform statistical tests here, but the problem is * that we only have a few loop counts to do testing. These @@ -718,7 +669,7 @@ int jent_entropy_init(void) /* Invoke core entropy collection logic */ jent_get_nstime(&time); ec.prev_time = time; - jent_lfsr_time(&ec, time, 0, 0); + jent_condition_data(&ec, time, 0); jent_get_nstime(&time2); /* test whether timer works */ diff --git a/crypto/jitterentropy.h b/crypto/jitterentropy.h index 5cc583f6bc6b8..b3890ff26a023 100644 --- a/crypto/jitterentropy.h +++ b/crypto/jitterentropy.h @@ -2,14 +2,18 @@ extern void *jent_zalloc(unsigned int len); extern void jent_zfree(void *ptr); -extern void jent_memcpy(void *dest, const void *src, unsigned int n); extern void jent_get_nstime(__u64 *out); +extern int jent_hash_time(void *hash_state, __u64 time, u8 *addtl, + unsigned int addtl_len, __u64 hash_loop_cnt, + unsigned int stuck); +int jent_read_random_block(void *hash_state, char *dst, unsigned int dst_len); struct rand_data; -extern int jent_entropy_init(void); +extern int jent_entropy_init(void *hash_state); extern int jent_read_entropy(struct rand_data *ec, unsigned char *data, unsigned int len); extern struct rand_data *jent_entropy_collector_alloc(unsigned int osr, - unsigned int flags); + unsigned int flags, + void *hash_state); extern void jent_entropy_collector_free(struct rand_data *entropy_collector); diff --git a/crypto/rng.c b/crypto/rng.c index c650678106a7f..011a510492580 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -6,6 +6,9 @@ * * Copyright (c) 2008 Neil Horman * Copyright (c) 2015 Herbert Xu + * + * Copyright (C) 2025 Ctrl IQ, Inc. + * Author: Sultan Alsawaf */ #include @@ -14,10 +17,9 @@ #include #include #include -#include #include +#include #include -#include #include #include #include @@ -26,12 +28,39 @@ #include "internal.h" -static ____cacheline_aligned_in_smp DEFINE_MUTEX(crypto_reseed_rng_lock); -static struct crypto_rng *crypto_reseed_rng; -static ____cacheline_aligned_in_smp DEFINE_MUTEX(crypto_default_rng_lock); +static ____cacheline_aligned_in_smp DEFINE_RT_MUTEX(crypto_default_rng_lock); struct crypto_rng *crypto_default_rng; EXPORT_SYMBOL_GPL(crypto_default_rng); -static int crypto_default_rng_refcnt; +static unsigned int crypto_default_rng_refcnt; +static bool drbg_registered __ro_after_init; + +/* + * Per-CPU RNG instances are only used by crypto_devrandom_rng. The global RNG, + * crypto_default_rng, is only used directly by other drivers. + * + * Per-CPU instances of the DRBG are efficient because the DRBG itself supports + * an arbitrary number of instances and can be seeded on a per-CPU basis. + * + * Specifically, the DRBG is seeded by the CRNG and the Jitter RNG. The CRNG is + * globally accessible and is already per-CPU. And while the Jitter RNG _isn't_ + * per-CPU, creating a DRBG instance also creates a Jitter RNG instance; + * therefore, per-CPU DRBG instances implies per-CPU Jitter RNG instances. + */ +struct cpu_rng_inst { + local_lock_t lock; + struct rt_mutex mlock; + struct crypto_rng *rng; + void *page; +}; + +static DEFINE_PER_CPU_ALIGNED(struct cpu_rng_inst, pcpu_default_rng) = { + .lock = INIT_LOCAL_LOCK(pcpu_default_rng.lock), + .mlock = __RT_MUTEX_INITIALIZER(pcpu_default_rng.mlock) +}; +static DEFINE_PER_CPU_ALIGNED(struct cpu_rng_inst, pcpu_reseed_rng) = { + /* The reseed instances don't use the local lock */ + .mlock = __RT_MUTEX_INITIALIZER(pcpu_reseed_rng.mlock) +}; int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { @@ -145,11 +174,11 @@ int crypto_get_default_rng(void) { int err; - mutex_lock(&crypto_default_rng_lock); + rt_mutex_lock(&crypto_default_rng_lock); err = crypto_get_rng(&crypto_default_rng); if (!err) crypto_default_rng_refcnt++; - mutex_unlock(&crypto_default_rng_lock); + rt_mutex_unlock(&crypto_default_rng_lock); return err; } @@ -157,39 +186,25 @@ EXPORT_SYMBOL_GPL(crypto_get_default_rng); void crypto_put_default_rng(void) { - mutex_lock(&crypto_default_rng_lock); + rt_mutex_lock(&crypto_default_rng_lock); crypto_default_rng_refcnt--; - mutex_unlock(&crypto_default_rng_lock); + rt_mutex_unlock(&crypto_default_rng_lock); } EXPORT_SYMBOL_GPL(crypto_put_default_rng); #if defined(CONFIG_CRYPTO_RNG) || defined(CONFIG_CRYPTO_RNG_MODULE) -static int crypto_del_rng(struct crypto_rng **rngp, int *refcntp, - struct mutex *lock) +int crypto_del_default_rng(void) { - int err = -EBUSY; + bool busy; - mutex_lock(lock); - if (refcntp && *refcntp) - goto out; - - crypto_free_rng(*rngp); - *rngp = NULL; - - err = 0; - -out: - mutex_unlock(lock); - - return err; -} + rt_mutex_lock(&crypto_default_rng_lock); + if (!(busy = crypto_default_rng_refcnt)) { + crypto_free_rng(crypto_default_rng); + crypto_default_rng = NULL; + } + rt_mutex_unlock(&crypto_default_rng_lock); -int crypto_del_default_rng(void) -{ - return crypto_del_rng(&crypto_default_rng, &crypto_default_rng_refcnt, - &crypto_default_rng_lock) ?: - crypto_del_rng(&crypto_reseed_rng, NULL, - &crypto_reseed_rng_lock); + return busy ? -EBUSY : 0; } EXPORT_SYMBOL_GPL(crypto_del_default_rng); #endif @@ -201,6 +216,19 @@ int crypto_register_rng(struct rng_alg *alg) if (alg->seedsize > PAGE_SIZE / 8) return -EINVAL; + /* + * In FIPS mode, the DRBG must take precedence over all other "stdrng" + * algorithms. Therefore, forbid registration of a non-DRBG stdrng in + * FIPS mode. All of the DRBG's driver names are prefixed with "drbg_". + * This also stops new stdrng instances from getting registered after it + * is known that the DRBG is registered, so a new module can't come in + * and pretend to be the DRBG. And when CONFIG_CRYPTO_FIPS is enabled, + * the DRBG is built into the kernel directly; it can't be a module. + */ + if (fips_enabled && !strcmp(base->cra_name, "stdrng") && + (drbg_registered || strncmp(base->cra_driver_name, "drbg_", 5))) + return -EINVAL; + base->cra_type = &crypto_rng_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_RNG; @@ -225,6 +253,18 @@ int crypto_register_rngs(struct rng_alg *algs, int count) goto err; } + /* + * Track when the DRBG is registered in FIPS mode. The DRBG calls + * crypto_register_rngs() to register its stdrng instances, and since + * crypto_register_rng() only allows stdrng instances from the DRBG in + * FIPS mode, a successful stdrng registration means it was the DRBG. + * Just check the first alg in the array to see if it's called "stdrng", + * since all of the DRBG's algorithms are named "stdrng". Once + * drbg_registered is set to true, this if-statement is always false. + */ + if (fips_enabled && !strcmp(algs->base.cra_name, "stdrng")) + drbg_registered = true; + return 0; err: @@ -244,102 +284,375 @@ void crypto_unregister_rngs(struct rng_alg *algs, int count) } EXPORT_SYMBOL_GPL(crypto_unregister_rngs); -static ssize_t crypto_devrandom_read_iter(struct iov_iter *iter, bool reseed) +/* + * On non-PREEMPT_RT kernels, local locks disable preemption. When there's no + * rng allocated, one must be allocated by calling crypto_get_rng(), which can + * sleep. Therefore, crypto_get_rng() cannot be called under local_lock(), so if + * our CPU's RNG instance doesn't have an rng allocated, we drop the local lock + * and take a mutex lock instead. After the local lock is dropped, the current + * task can be freely migrated to another CPU, which means that calling + * local_lock() again might not result in the same instance getting locked as + * before. That's why this function exists: to loop on calling local_lock() and + * allocating an rng as needed with crypto_get_rng() until the current CPU's + * instance is found to have an rng allocated. If crypto_get_rng() ever fails, + * this function returns an error even if there are instances for other CPUs + * which _do_ have an rng allocated. + */ +static __always_inline struct cpu_rng_inst * +lock_default_rng(struct crypto_rng **rng) __acquires(&cri->lock) { - struct crypto_rng *rng; - u8 tmp[256]; - ssize_t ret; + struct cpu_rng_inst __percpu *pcri = &pcpu_default_rng; + struct cpu_rng_inst *cri; + int ret; + + while (1) { + local_lock(&pcri->lock); + cri = this_cpu_ptr(pcri); + /* + * cri->rng can only transition from NULL to non-NULL. This may + * occur on a different CPU, thus cri->rng must be read + * atomically to prevent data races; this elides mlock by + * pairing with the WRITE_ONCE() in the slow path below. + * + * And if cri->rng is non-NULL, then it is good to go. To avoid + * data races due to load speculation on torn cri->rng loads + * _after_ the NULL check, one of the following is required: + * 1. smp_acquire__after_ctrl_dep() in the if-statement + * 2. All cri->rng reads are performed with READ_ONCE() + * 3. cri->rng is never read again outside this function + * + * Option #3 yields the best performance, so this function + * provides the rng pointer as an output for the caller to use. + */ + *rng = READ_ONCE(cri->rng); + if (likely(*rng)) + return cri; + + /* + * Slow path: there's no rng currently allocated to this instance. + * Release the local lock and acquire this instance's mlock to + * perform the allocation. + * + * Note that this task may be migrated to a different CPU now! + */ + local_unlock(&cri->lock); + rt_mutex_lock(&cri->mlock); + if (!cri->rng) { + struct crypto_rng *new_rng = NULL; + + ret = crypto_get_rng(&new_rng); + if (ret) { + rt_mutex_unlock(&cri->mlock); + break; + } - if (unlikely(!iov_iter_count(iter))) - return 0; + /* + * Pairs with READ_ONCE() above, because we might not be + * on the same CPU anymore as when we first got `cri`. + */ + WRITE_ONCE(cri->rng, new_rng); + } + rt_mutex_unlock(&cri->mlock); + } - if (reseed) { - u32 flags = 0; + /* + * Even if this task got migrated to another CPU that _does_ have an rng + * allocated, just bail out if crypto_get_rng() ever fails in order to + * avoid looping forever. + */ + return ERR_PTR(ret); +} - /* If reseeding is requested, acquire a lock on - * crypto_reseed_rng so it is not swapped out until - * the initial random bytes are generated. - * - * The algorithm implementation is also protected with - * a separate mutex (drbg->drbg_mutex) around the - * reseed-and-generate operation. +static __always_inline struct cpu_rng_inst * +lock_reseed_rng(struct crypto_rng **rng) __acquires(&cri->mlock) +{ + struct cpu_rng_inst __percpu *pcri = &pcpu_reseed_rng; + struct cpu_rng_inst *cri; + int ret; + + /* + * Use whichever CPU this task is currently running on, knowing full + * well that the task can freely migrate to other CPUs. The reseed RNG + * requires holding a lock across the entire devrandom read, so that + * another task cannot extract entropy from the same seed. In other + * words, when reseeding is requested, reseeding must be done every time + * every time mlock is acquired. + */ + cri = raw_cpu_ptr(pcri); + rt_mutex_lock(&cri->mlock); + if (likely(cri->rng)) { + /* + * Since this rng instance wasn't just allocated, it needs to be + * explicitly reseeded. New rng instances are seeded on creation + * in crypto_get_rng() and thus don't need explicit reseeding. */ - mutex_lock(&crypto_reseed_rng_lock); + crypto_tfm_set_flags(crypto_rng_tfm(cri->rng), + CRYPTO_TFM_REQ_NEED_RESEED); + } else { + ret = crypto_get_rng(&cri->rng); + if (ret) { + rt_mutex_unlock(&cri->mlock); + return ERR_PTR(ret); + } + } - /* If crypto_default_rng is not set, it will be seeded - * at creation in __crypto_get_default_rng and thus no - * reseeding is needed. - */ - if (crypto_reseed_rng) - flags |= CRYPTO_TFM_REQ_NEED_RESEED; + *rng = cri->rng; + return cri; +} - ret = crypto_get_rng(&crypto_reseed_rng); - if (ret) { - mutex_unlock(&crypto_reseed_rng_lock); - return ret; +#define lock_local_rng(rng, reseed) \ + ({ (reseed) ? lock_reseed_rng(rng) : lock_default_rng(rng); }) + +#define unlock_local_rng(cri, reseed) \ +do { \ + if (reseed) \ + rt_mutex_unlock(&(cri)->mlock); \ + else \ + local_unlock(&(cri)->lock); \ +} while (0) + +static __always_inline void +clear_rng_page(struct cpu_rng_inst *cri, size_t count) +{ + /* For zeroing a whole page, clear_page() is faster than memset() */ + count < PAGE_SIZE ? memset(cri->page, 0, count) : clear_page(cri->page); +} + +static ssize_t crypto_devrandom_read_iter(struct iov_iter *iter, bool reseed) +{ + /* lock_local_rng() puts us in atomic context for !reseed on non-RT */ + const bool atomic = !reseed && !IS_ENABLED(CONFIG_PREEMPT_RT); + const bool user_no_reseed = !reseed && user_backed_iter(iter); + size_t ulen, page_dirty_len = 0; + struct cpu_rng_inst *cri; + struct crypto_rng *rng; + void __user *uaddr; + struct page *upage; + ssize_t ret = 0; + + if (unlikely(!iov_iter_count(iter))) + return 0; + + /* Set up the starting user destination address and length */ + if (user_no_reseed) { + if (iter_is_ubuf(iter)) { + uaddr = iter->ubuf + iter->iov_offset; + ulen = iov_iter_count(iter); + } else if (iter_is_iovec(iter)) { + uaddr = iter_iov_addr(iter); + ulen = iter_iov_len(iter); + } else { + /* + * ITER_UBUF and ITER_IOVEC are the only user-backed + * iters. Bug out if a new user-backed iter appears. + */ + BUG(); } + } - rng = crypto_reseed_rng; - crypto_tfm_set_flags(crypto_rng_tfm(rng), flags); - } else { - ret = crypto_get_default_rng(); - if (ret) - return ret; - rng = crypto_default_rng; +restart: + /* + * Pin the user page backing the current user destination address, + * potentially prefaulting to allocate a page for the destination. By + * prefaulting without the RNG lock held, the DRBG won't be blocked by + * time spent on page faults for this task, and thus the DRBG can still + * be used by other tasks. + */ + if (user_no_reseed && pin_user_pages_fast((unsigned long)uaddr, 1, + FOLL_WRITE, &upage) != 1) + goto exit; + + cri = lock_local_rng(&rng, reseed); + if (IS_ERR(cri)) { + if (!ret) + ret = PTR_ERR(cri); + goto unpin_upage; } - for (;;) { - size_t i, copied; + while (1) { + size_t copied, i = min(iov_iter_count(iter), PAGE_SIZE); + bool resched_without_lock = false; int err; - i = min_t(size_t, iov_iter_count(iter), sizeof(tmp)); - err = crypto_rng_get_bytes(rng, tmp, i); + /* + * Generate up to one page at a time, and align to a page + * boundary so we only need to pin one user page at a time. + */ + if (user_no_reseed) + i = min3(i, PAGE_SIZE - offset_in_page(uaddr), ulen); + + /* + * On non-PREEMPT_RT kernels, local locks disable preemption. + * The DRBG's generate() function has a mutex lock, which could + * mean that we'll schedule while atomic if the mutex lock + * sleeps. However, that will never happen if we ensure that + * there's never any contention on the DRBG's mutex lock while + * we're atomic! Our local lock ensures calls to the DRBG are + * always serialized, so there's no contention from here. And + * the DRBG only uses its mutex lock from one other path, when + * an instance of the DRBG is freshly allocated, which we only + * do from crypto_get_rng(). So the DRBG's mutex lock is + * guaranteed to not have contention when we call generate() and + * thus it'll never sleep here. And of course, nothing else in + * generate() ever sleeps. + */ + err = crypto_rng_get_bytes(rng, cri->page, i); if (err) { - ret = ret ?: err; + if (!ret) + ret = err; break; } - copied = copy_to_iter(tmp, i, iter); - ret += copied; + /* + * Record the number of bytes used in cri->page and either copy + * directly to the user address without faulting, or copy to the + * iter which is always backed by kernel memory when !reseed && + * !user_backed_iter(). When reseed == true, the iter may be + * backed by user memory, but we copy to it with the possibility + * of page faults anyway because we need to hold the lock across + * the entire call; this is why a mutex is used instead of a + * local lock for the reseed RNG, to permit sleeping without + * yielding the DRBG instance. + */ + page_dirty_len = max(i, page_dirty_len); + if (user_no_reseed) { + err = copy_to_user_nofault(uaddr, cri->page, i); + if (err >= 0) { + iov_iter_advance(iter, i - err); + ret += i - err; + } + if (err) + break; + } else { + /* + * We know that copying from cri->page is safe, so use + * _copy_to_iter() directly to skip check_copy_size(). + */ + copied = _copy_to_iter(cri->page, i, iter); + ret += copied; + if (copied != i) + break; + } - if (!iov_iter_count(iter) || copied != i) + /* + * Quit when either the requested number of bytes have been + * generated or there is a pending signal. + */ + if (!iov_iter_count(iter) || signal_pending(current)) break; - BUILD_BUG_ON(PAGE_SIZE % sizeof(tmp) != 0); - if (ret % PAGE_SIZE == 0) { - if (signal_pending(current)) - break; - cond_resched(); + /* Compute the next user destination address and length */ + if (user_no_reseed) { + ulen -= i; + if (likely(ulen)) { + uaddr += i; + } else { + /* + * This path is only reachable by ITER_IOVEC + * because ulen is initialized to the request + * size for ITER_UBUF, and therefore ITER_UBUF + * will always quit at the iov_iter_count() + * check above before ulen can become zero. + * + * iter->iov_offset is guaranteed to be zero + * here, so iter_iov_{addr|len}() isn't needed. + */ + uaddr = iter_iov(iter)->iov_base; + ulen = iter_iov(iter)->iov_len; + } + + unpin_user_page(upage); + } + + /* + * Reschedule right now if needed and we're not atomic. If we're + * atomic, then we must first drop the lock to reschedule. + */ + if (need_resched()) { + if (atomic) + resched_without_lock = true; + else + cond_resched(); + } + + /* + * Optimistically try to pin the next user page without + * faulting, so we don't need to clear cri->page and drop the + * lock on every iteration. If this fails, we fall back to + * pinning with the option to prefault. + */ + if (user_no_reseed && !resched_without_lock && + pin_user_pages_fast_only((unsigned long)uaddr, 1, + FOLL_WRITE, &upage) == 1) + continue; + + /* + * Restart if either rescheduling is needed (and requires + * dropping the lock since we're atomic) or the optimistic page + * pinning attempt failed. + * + * This always implies `reseed == false`, so unlock_local_rng() + * can just be passed `false` for reseed to eliminate a branch. + */ + if (resched_without_lock || user_no_reseed) { + /* + * Clear the buffer of our latest random bytes before + * unlocking and potentially migrating CPUs, in which + * case we wouldn't have the same `cri` anymore. + */ + clear_rng_page(cri, page_dirty_len); + unlock_local_rng(cri, false); + page_dirty_len = 0; + if (resched_without_lock) + cond_resched(); + goto restart; } } - if (reseed) - mutex_unlock(&crypto_reseed_rng_lock); - else - crypto_put_default_rng(); - memzero_explicit(tmp, sizeof(tmp)); + if (page_dirty_len) + clear_rng_page(cri, page_dirty_len); + unlock_local_rng(cri, reseed); +unpin_upage: + if (user_no_reseed) + unpin_user_page(upage); +exit: return ret ? ret : -EFAULT; } static const struct random_extrng crypto_devrandom_rng = { - .extrng_read_iter = crypto_devrandom_read_iter, - .owner = THIS_MODULE, + .extrng_read_iter = crypto_devrandom_read_iter }; -static int __init crypto_rng_init(void) +static void __init alloc_pcpu_inst(struct cpu_rng_inst __percpu *pcri) { - if (fips_enabled) - random_register_extrng(&crypto_devrandom_rng); - return 0; + int cpu; + + for_each_possible_cpu(cpu) { + struct cpu_rng_inst *cri = per_cpu_ptr(pcri, cpu); + + cri->page = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); + local_lock_init(&cri->lock); + } } -static void __exit crypto_rng_exit(void) +static int __init crypto_rng_init(void) { - random_unregister_extrng(); + if (!fips_enabled) + return 0; + + /* + * Never fail to register the RNG override in FIPS mode because failure + * would result in the system quietly booting without the FIPS-mandated + * RNG installed. This would be catastrophic for FIPS compliance, hence + * the RNG override setup is *not* allowed to fail. + */ + alloc_pcpu_inst(&pcpu_default_rng); + alloc_pcpu_inst(&pcpu_reseed_rng); + random_register_extrng(&crypto_devrandom_rng); + return 0; } late_initcall(crypto_rng_init); -module_exit(crypto_rng_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Random Number Generator"); diff --git a/drivers/char/random.c b/drivers/char/random.c index 317a0b15dc34c..5fe3118a3c278 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include @@ -314,7 +313,7 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], /* * Hook for external RNG. */ -static const struct random_extrng __rcu *extrng; +static const struct random_extrng *extrng __ro_after_init; /* * This function returns a ChaCha state that you may use for generating @@ -966,18 +965,12 @@ void __init add_bootloader_randomness(const void *buf, size_t len) credit_init_bits(len * 8); } -void random_register_extrng(const struct random_extrng *rng) +void __init random_register_extrng(const struct random_extrng *rng) { - rcu_assign_pointer(extrng, rng); + /* Don't allow the registered extrng to be overridden */ + BUG_ON(extrng); + extrng = rng; } -EXPORT_SYMBOL_GPL(random_register_extrng); - -void random_unregister_extrng(void) -{ - RCU_INIT_POINTER(extrng, NULL); - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(random_unregister_extrng); #if IS_ENABLED(CONFIG_VMGENID) static BLOCKING_NOTIFIER_HEAD(vmfork_chain); @@ -1386,7 +1379,6 @@ static void __cold try_to_generate_entropy(void) SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags) { - const struct random_extrng *rng; struct iov_iter iter; struct iovec iov; int ret; @@ -1404,19 +1396,11 @@ SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags if (len > INT_MAX) len = INT_MAX; - rcu_read_lock(); - rng = rcu_dereference(extrng); - if (rng && !try_module_get(rng->owner)) - rng = NULL; - rcu_read_unlock(); - - if (rng) { + if (extrng) { ret = import_single_range(READ, ubuf, len, &iov, &iter); if (unlikely(ret)) return ret; - ret = rng->extrng_read_iter(&iter, !!(flags & GRND_RANDOM)); - module_put(rng->owner); - return ret; + return extrng->extrng_read_iter(&iter, !!(flags & GRND_RANDOM)); } if (!crng_ready() && !(flags & GRND_INSECURE)) { @@ -1589,52 +1573,24 @@ static int random_fasync(int fd, struct file *filp, int on) static int random_open(struct inode *inode, struct file *filp) { - const struct random_extrng *rng; - - rcu_read_lock(); - rng = rcu_dereference(extrng); - if (rng && !try_module_get(rng->owner)) - rng = NULL; - rcu_read_unlock(); - - if (!rng) - return 0; - - filp->f_op = &extrng_random_fops; - filp->private_data = rng->owner; + if (extrng) + filp->f_op = &extrng_random_fops; return 0; } static int urandom_open(struct inode *inode, struct file *filp) { - const struct random_extrng *rng; + if (extrng) + filp->f_op = &extrng_urandom_fops; - rcu_read_lock(); - rng = rcu_dereference(extrng); - if (rng && !try_module_get(rng->owner)) - rng = NULL; - rcu_read_unlock(); - - if (!rng) - return 0; - - filp->f_op = &extrng_urandom_fops; - filp->private_data = rng->owner; - - return 0; -} - -static int extrng_release(struct inode *inode, struct file *filp) -{ - module_put(filp->private_data); return 0; } static ssize_t extrng_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { - return rcu_dereference_raw(extrng)->extrng_read_iter(iter, false); + return extrng->extrng_read_iter(iter, false); } const struct file_operations random_fops = { @@ -1670,7 +1626,6 @@ static const struct file_operations extrng_random_fops = { .unlocked_ioctl = random_ioctl, .fasync = random_fasync, .llseek = noop_llseek, - .release = extrng_release, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, }; @@ -1682,7 +1637,6 @@ static const struct file_operations extrng_urandom_fops = { .unlocked_ioctl = random_ioctl, .fasync = random_fasync, .llseek = noop_llseek, - .release = extrng_release, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, }; diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig index 901fbffbf718e..3f36ee6a8ecee 100644 --- a/drivers/net/ethernet/microsoft/Kconfig +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -22,6 +22,7 @@ config MICROSOFT_MANA depends on PCI_HYPERV select AUXILIARY_BUS select PAGE_POOL + select NET_SHAPER help This driver supports Microsoft Azure Network Adapter (MANA). So far, the driver is only supported on X86_64. diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index a64ef3e227bcb..05ac57760584b 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -837,6 +837,78 @@ static int mana_change_mtu(struct net_device *ndev, int new_mtu) return err; } +static int mana_shaper_set(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + struct mana_port_context *apc = netdev_priv(binding->netdev); + u32 old_speed, rate; + int err; + + if (shaper->handle.scope != NET_SHAPER_SCOPE_NETDEV) { + NL_SET_ERR_MSG_MOD(extack, "net shaper scope should be netdev"); + return -EINVAL; + } + + if (apc->handle.id && shaper->handle.id != apc->handle.id) { + NL_SET_ERR_MSG_MOD(extack, "Cannot create multiple shapers"); + return -EOPNOTSUPP; + } + + if (!shaper->bw_max || (shaper->bw_max % 100000000)) { + NL_SET_ERR_MSG_MOD(extack, "Please use multiples of 100Mbps for bandwidth"); + return -EINVAL; + } + + rate = div_u64(shaper->bw_max, 1000); /* Convert bps to Kbps */ + rate = div_u64(rate, 1000); /* Convert Kbps to Mbps */ + + /* Get current speed */ + err = mana_query_link_cfg(apc); + old_speed = (err) ? SPEED_UNKNOWN : apc->speed; + + if (!err) { + err = mana_set_bw_clamp(apc, rate, TRI_STATE_TRUE); + apc->speed = (err) ? old_speed : rate; + apc->handle = (err) ? apc->handle : shaper->handle; + } + + return err; +} + +static int mana_shaper_del(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct netlink_ext_ack *extack) +{ + struct mana_port_context *apc = netdev_priv(binding->netdev); + int err; + + err = mana_set_bw_clamp(apc, 0, TRI_STATE_FALSE); + + if (!err) { + /* Reset mana port context parameters */ + apc->handle.id = 0; + apc->handle.scope = NET_SHAPER_SCOPE_UNSPEC; + apc->speed = 0; + } + + return err; +} + +static void mana_shaper_cap(struct net_shaper_binding *binding, + enum net_shaper_scope scope, + unsigned long *flags) +{ + *flags = BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MAX) | + BIT(NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS); +} + +static const struct net_shaper_ops mana_shaper_ops = { + .set = mana_shaper_set, + .delete = mana_shaper_del, + .capabilities = mana_shaper_cap, +}; + static const struct net_device_ops mana_devops = { .ndo_open = mana_open, .ndo_stop = mana_close, @@ -850,6 +922,7 @@ static const struct net_device_ops mana_devops = { .ndo_bpf = mana_bpf, .ndo_xdp_xmit = mana_xdp_xmit, .ndo_change_mtu = mana_change_mtu, + .net_shaper_ops = &mana_shaper_ops, }; static void mana_cleanup_port_context(struct mana_port_context *apc) @@ -1293,6 +1366,87 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, return err; } +int mana_query_link_cfg(struct mana_port_context *apc) +{ + struct net_device *ndev = apc->ndev; + struct mana_query_link_config_resp resp = {}; + struct mana_query_link_config_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG, + sizeof(req), sizeof(resp)); + + req.vport = apc->port_handle; + req.hdr.resp.msg_version = GDMA_MESSAGE_V2; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + + if (err) { + netdev_err(ndev, "Failed to query link config: %d\n", err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG, + sizeof(resp)); + + if (err || resp.hdr.status) { + netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err, + resp.hdr.status); + if (!err) + err = -EOPNOTSUPP; + return err; + } + + if (resp.qos_unconfigured) { + err = -EINVAL; + return err; + } + apc->speed = resp.link_speed_mbps; + apc->max_speed = resp.qos_speed_mbps; + return 0; +} + +int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed, + int enable_clamping) +{ + struct mana_set_bw_clamp_resp resp = {}; + struct mana_set_bw_clamp_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_SET_BW_CLAMP, + sizeof(req), sizeof(resp)); + req.vport = apc->port_handle; + req.link_speed_mbps = speed; + req.enable_clamping = enable_clamping; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + + if (err) { + netdev_err(ndev, "Failed to set bandwidth clamp for speed %u, err = %d", + speed, err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_SET_BW_CLAMP, + sizeof(resp)); + + if (err || resp.hdr.status) { + netdev_err(ndev, "Failed to set bandwidth clamp: %d, 0x%x\n", err, + resp.hdr.status); + if (!err) + err = -EOPNOTSUPP; + return err; + } + + if (resp.qos_unconfigured) + netdev_info(ndev, "QoS is unconfigured\n"); + + return 0; +} + int mana_create_wq_obj(struct mana_port_context *apc, mana_handle_t vport, u32 wq_type, struct mana_obj_spec *wq_spec, @@ -3159,6 +3313,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, netif_carrier_on(ndev); + debugfs_create_u32("current_speed", 0400, apc->mana_port_debugfs, &apc->speed); + return 0; free_indir: diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index fd6c6c272a155..0e2f4343ac67f 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -504,6 +504,12 @@ static int mana_set_ringparam(struct net_device *ndev, static int mana_get_link_ksettings(struct net_device *ndev, struct ethtool_link_ksettings *cmd) { + struct mana_port_context *apc = netdev_priv(ndev); + int err; + + err = mana_query_link_cfg(apc); + cmd->base.speed = (err) ? SPEED_UNKNOWN : apc->max_speed; + cmd->base.duplex = DUPLEX_FULL; cmd->base.port = PORT_OTHER; diff --git a/include/linux/mm.h b/include/linux/mm.h index 74fe0873e3518..05e7a1a6afb1c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2577,6 +2577,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); int pin_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); +int pin_user_pages_fast_only(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages); void folio_add_pin(struct folio *folio); int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc); diff --git a/include/linux/random.h b/include/linux/random.h index d4cabe51e9434..9bde794ec8d93 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -9,12 +9,6 @@ #include -struct iov_iter; -struct random_extrng { - ssize_t (*extrng_read_iter)(struct iov_iter *, bool reseed); - struct module *owner; -}; - struct notifier_block; void add_device_randomness(const void *buf, size_t len); @@ -42,9 +36,6 @@ static inline int register_random_vmfork_notifier(struct notifier_block *nb) { r static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; } #endif -void random_register_extrng(const struct random_extrng *rng); -void random_unregister_extrng(void); - void get_random_bytes(void *buf, size_t len); u8 get_random_u8(void); u16 get_random_u16(void); @@ -173,6 +164,13 @@ int random_online_cpu(unsigned int cpu); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; + +struct iov_iter; +struct random_extrng { + ssize_t (*extrng_read_iter)(struct iov_iter *iter, bool reseed); +}; + +void __init random_register_extrng(const struct random_extrng *rng); #endif #endif /* _LINUX_RANDOM_H */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 28afd2a3d8f46..a1fb59e53b971 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -4,6 +4,9 @@ #ifndef _MANA_H #define _MANA_H +#include +#include RH_KABI_HIDE_INCLUDE() + #include "gdma.h" #include "hw_channel.h" @@ -552,6 +555,15 @@ struct mana_port_context { /* Debugfs */ struct dentry *mana_port_debugfs; + + /* Fields added via RH_KABI_EXTEND to preserve kABI for MANA symbols + * added to the kABI whitelist in RHEL 9.8. + * Upstream commits: 75cabb46935b ("net: mana: Add support for net_shaper_ops") + * and a6d5edf11e0c ("net: mana: Add speed support in mana_get_link_ksettings") + */ + RH_KABI_EXTEND(struct net_shaper_handle handle) + RH_KABI_EXTEND(u32 speed) + RH_KABI_EXTEND(u32 max_speed) }; netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); @@ -577,6 +589,9 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); int mana_query_gf_stats(struct mana_context *ac); +int mana_query_link_cfg(struct mana_port_context *apc); +int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed, + int enable_clamping); void mana_query_phy_stats(struct mana_port_context *apc); int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); @@ -605,6 +620,8 @@ enum mana_command_code { MANA_FENCE_RQ = 0x20006, MANA_CONFIG_VPORT_RX = 0x20007, MANA_QUERY_VPORT_CONFIG = 0x20008, + RH_KABI_EXTEND_ENUM(MANA_QUERY_LINK_CONFIG = 0x2000A) + RH_KABI_EXTEND_ENUM(MANA_SET_BW_CLAMP = 0x2000B) MANA_QUERY_PHY_STAT = 0x2000c, /* Privileged commands for the PF mode */ @@ -614,6 +631,35 @@ enum mana_command_code { MANA_DEREGISTER_HW_PORT = 0x28004, }; +/* Query Link Configuration*/ +struct mana_query_link_config_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; +}; /* HW DATA */ + +struct mana_query_link_config_resp { + struct gdma_resp_hdr hdr; + u32 qos_speed_mbps; + u8 qos_unconfigured; + u8 reserved1[3]; + u32 link_speed_mbps; + u8 reserved2[4]; +}; /* HW DATA */ + +/* Set Bandwidth Clamp*/ +struct mana_set_bw_clamp_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + enum TRI_STATE enable_clamping; + u32 link_speed_mbps; +}; /* HW DATA */ + +struct mana_set_bw_clamp_resp { + struct gdma_resp_hdr hdr; + u8 qos_unconfigured; + u8 reserved[7]; +}; /* HW DATA */ + /* Query Device Configuration */ struct mana_query_device_cfg_req { struct gdma_req_hdr hdr; diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 81f69e5c41de9..e16c4a7c04d4f 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -126,8 +126,6 @@ E_(rxrpc_call_poke_timer_now, "Timer-now") #define rxrpc_skb_traces \ - EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \ - EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ EM(rxrpc_skb_get_last_nack, "GET last-nack") \ @@ -139,6 +137,7 @@ EM(rxrpc_skb_new_error_report, "NEW error-rpt") \ EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \ EM(rxrpc_skb_new_unshared, "NEW unshared ") \ + EM(rxrpc_skb_put_call_rx, "PUT call-rx ") \ EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \ EM(rxrpc_skb_put_conn_work, "PUT conn-work") \ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ @@ -152,6 +151,7 @@ EM(rxrpc_skb_see_recvmsg, "SEE recvmsg ") \ EM(rxrpc_skb_see_reject, "SEE reject ") \ EM(rxrpc_skb_see_rotate, "SEE rotate ") \ + EM(rxrpc_skb_see_unshare_nomem, "SEE unshar-nm") \ E_(rxrpc_skb_see_version, "SEE version ") #define rxrpc_local_traces \ diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c index 40f5908e57a4f..e16dca1e23d52 100644 --- a/lib/mpi/ec.c +++ b/lib/mpi/ec.c @@ -584,6 +584,9 @@ void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, ctx->a = mpi_copy(a); ctx->b = mpi_copy(b); + ctx->d = NULL; + ctx->t.two_inv_p = NULL; + ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL; mpi_ec_get_reset(ctx); diff --git a/mm/gup.c b/mm/gup.c index 4ef41429be155..c85a7b052f92b 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3491,6 +3491,20 @@ int pin_user_pages_fast(unsigned long start, int nr_pages, } EXPORT_SYMBOL_GPL(pin_user_pages_fast); +/* + * This is the FOLL_PIN equivalent of get_user_pages_fast_only(). Behavior is + * the same, except that this one sets FOLL_PIN instead of FOLL_GET. + */ +int pin_user_pages_fast_only(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages) +{ + if (!is_valid_gup_args(pages, NULL, &gup_flags, + FOLL_PIN | FOLL_FAST_ONLY)) + return -EINVAL; + return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); +} +EXPORT_SYMBOL_GPL(pin_user_pages_fast_only); + /** * pin_user_pages_remote() - pin pages of a remote process * diff --git a/net/core/gro.c b/net/core/gro.c index a1b6faeb2e26a..de524b6133845 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -113,6 +113,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) if (p->pp_recycle != skb->pp_recycle) return -ETOOMANYREFS; + if (skb_zcopy(p) || skb_zcopy(skb)) + return -ETOOMANYREFS; + /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */ gro_max_size = p->protocol == htons(ETH_P_IPV6) ? READ_ONCE(p->dev->gro_max_size) : diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 077e5df789b0a..ded1772b35ef5 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1253,7 +1253,6 @@ int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); -void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 38c4c4743cf16..661e74e4c9111 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -342,8 +342,28 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) goto out; - if (skb) - rxrpc_input_call_packet(call, skb); + if (skb) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && + sp->hdr.securityIndex != 0 && + (skb_cloned(skb) || skb->data_len)) { + /* Unshare the packet so that it can be + * modified by in-place decryption. + */ + struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); + + if (nskb) { + rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); + rxrpc_input_call_packet(call, nskb); + rxrpc_free_skb(nskb, rxrpc_skb_put_call_rx); + } else { + rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); + } + } else { + rxrpc_input_call_packet(call, skb); + } + } /* If we see our async-event poke, check for timeout trippage. */ now = ktime_get_real(); diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index af78c90e4dab8..0b0c8bd454671 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -168,13 +168,12 @@ static bool rxrpc_extract_abort(struct sk_buff *skb) /* * Process packets received on the local endpoint */ -static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) +static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff *skb) { struct rxrpc_connection *conn; struct sockaddr_rxrpc peer_srx; struct rxrpc_skb_priv *sp; struct rxrpc_peer *peer = NULL; - struct sk_buff *skb = *_skb; bool ret = false; skb_pull(skb, sizeof(struct udphdr)); @@ -220,25 +219,6 @@ static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); if (sp->hdr.seq == 0) return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq); - - /* Unshare the packet so that it can be modified for in-place - * decryption. - */ - if (sp->hdr.securityIndex != 0) { - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem); - *_skb = NULL; - return just_discard; - } - - if (skb != *_skb) { - rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare); - *_skb = skb; - rxrpc_new_skb(skb, rxrpc_skb_new_unshared); - sp = rxrpc_skb(skb); - } - } break; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -476,7 +456,7 @@ int rxrpc_io_thread(void *data) switch (skb->mark) { case RXRPC_SKB_MARK_PACKET: skb->priority = 0; - if (!rxrpc_input_packet(local, &skb)) + if (!rxrpc_input_packet(local, skb)) rxrpc_reject_packet(local, skb); trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_free_skb(skb, rxrpc_skb_put_input); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 3bcd6ee803960..e2169d1a14b5f 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -46,15 +46,6 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) skb_get(skb); } -/* - * Note the dropping of a ref on a socket buffer by the core. - */ -void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) -{ - int n = atomic_inc_return(&rxrpc_n_rx_skbs); - trace_rxrpc_skb(skb, 0, n, why); -} - /* * Note the destruction of a socket buffer. */ diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 1e6fd6ca513bd..0e0c997134ec6 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -77,6 +77,7 @@ enum { }; static int in_hand_shake; +static int debug; static char *os_name = ""; static char *os_major = ""; @@ -172,6 +173,20 @@ static void kvp_update_file(int pool) kvp_release_lock(pool); } +static void kvp_dump_initial_pools(int pool) +{ + int i; + + syslog(LOG_DEBUG, "===Start dumping the contents of pool %d ===\n", + pool); + + for (i = 0; i < kvp_file_info[pool].num_records; i++) + syslog(LOG_DEBUG, "pool: %d, %d/%d key=%s val=%s\n", + pool, i + 1, kvp_file_info[pool].num_records, + kvp_file_info[pool].records[i].key, + kvp_file_info[pool].records[i].value); +} + static void kvp_update_mem_state(int pool) { FILE *filep; @@ -259,6 +274,8 @@ static int kvp_file_init(void) return 1; kvp_file_info[i].num_records = 0; kvp_update_mem_state(i); + if (debug) + kvp_dump_initial_pools(i); } return 0; @@ -286,6 +303,9 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size) * Found a match; just move the remaining * entries up. */ + if (debug) + syslog(LOG_DEBUG, "%s: deleting the KVP: pool=%d key=%s val=%s", + __func__, pool, record[i].key, record[i].value); if (i == (num_records - 1)) { kvp_file_info[pool].num_records--; kvp_update_file(pool); @@ -304,20 +324,36 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size) kvp_update_file(pool); return 0; } + + if (debug) + syslog(LOG_DEBUG, "%s: could not delete KVP: pool=%d key=%s. Record not found", + __func__, pool, key); + return 1; } static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, const __u8 *value, int value_size) { - int i; - int num_records; struct kvp_record *record; + int num_records; int num_blocks; + int i; + + if (debug) + syslog(LOG_DEBUG, "%s: got a KVP: pool=%d key=%s val=%s", + __func__, pool, key, value); if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) || - (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) + (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) { + syslog(LOG_ERR, "%s: Too long key or value: key=%s, val=%s", + __func__, key, value); + + if (debug) + syslog(LOG_DEBUG, "%s: Too long key or value: pool=%d, key=%s, val=%s", + __func__, pool, key, value); return 1; + } /* * First update the in-memory state. @@ -337,6 +373,9 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, */ memcpy(record[i].value, value, value_size); kvp_update_file(pool); + if (debug) + syslog(LOG_DEBUG, "%s: updated: pool=%d key=%s val=%s", + __func__, pool, key, value); return 0; } @@ -348,8 +387,10 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, record = realloc(record, sizeof(struct kvp_record) * ENTRIES_PER_BLOCK * (num_blocks + 1)); - if (record == NULL) + if (!record) { + syslog(LOG_ERR, "%s: Memory alloc failure", __func__); return 1; + } kvp_file_info[pool].num_blocks++; } @@ -357,6 +398,11 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, memcpy(record[i].key, key, key_size); kvp_file_info[pool].records = record; kvp_file_info[pool].num_records++; + + if (debug) + syslog(LOG_DEBUG, "%s: added: pool=%d key=%s val=%s", + __func__, pool, key, value); + kvp_update_file(pool); return 0; } @@ -1355,6 +1401,7 @@ void print_usage(char *argv[]) fprintf(stderr, "Usage: %s [options]\n" "Options are:\n" " -n, --no-daemon stay in foreground, don't daemonize\n" + " -d, --debug Enable debug logs(syslog debug by default)\n" " -h, --help print this help\n", argv[0]); } @@ -1376,10 +1423,11 @@ int main(int argc, char *argv[]) static struct option long_options[] = { {"help", no_argument, 0, 'h' }, {"no-daemon", no_argument, 0, 'n' }, + {"debug", no_argument, 0, 'd' }, {0, 0, 0, 0 } }; - while ((opt = getopt_long(argc, argv, "hn", long_options, + while ((opt = getopt_long(argc, argv, "hnd", long_options, &long_index)) != -1) { switch (opt) { case 'n': @@ -1388,6 +1436,9 @@ int main(int argc, char *argv[]) case 'h': print_usage(argv); exit(0); + case 'd': + debug = 1; + break; default: print_usage(argv); exit(EXIT_FAILURE); @@ -1410,6 +1461,9 @@ int main(int argc, char *argv[]) */ kvp_get_domain_name(full_domain_name, sizeof(full_domain_name)); + if (debug) + syslog(LOG_INFO, "Logging debug info in syslog(debug)"); + if (kvp_file_init()) { syslog(LOG_ERR, "Failed to initialize the pools"); exit(EXIT_FAILURE); diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index d2cfc9b494a0e..6f75c54564176 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -159,6 +159,10 @@ FIXTURE_TEARDOWN(hmm) { int ret = close(self->fd); + if (ret != 0) { + fprintf(stderr, "close returned (%d) fd is (%d)\n", ret, self->fd); + exit(1); + } ASSERT_EQ(ret, 0); self->fd = -1; }