From cca5588e78b1e6a96425b1afe0cba5789d0fa2d2 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:52 +0530 Subject: [PATCH 01/26] x86/apic: Add new driver for Secure AVIC The Secure AVIC feature provides SEV-SNP guests hardware acceleration for performance sensitive APIC accesses while securely managing the guest-owned APIC state through the use of a private APIC backing page. This helps prevent malicious hypervisor from generating unexpected interrupts for a vCPU or otherwise violate architectural assumptions around APIC behavior. Add a new x2APIC driver that will serve as the base of the Secure AVIC support. It is initially the same as the x2APIC phys driver, but will be modified as features of Secure AVIC are implemented. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay --- arch/x86/Kconfig | 12 +++ arch/x86/boot/compressed/sev.c | 1 + arch/x86/coco/core.c | 3 + arch/x86/include/asm/msr-index.h | 4 +- arch/x86/kernel/apic/Makefile | 1 + arch/x86/kernel/apic/x2apic_savic.c | 112 ++++++++++++++++++++++++++++ include/linux/cc_platform.h | 8 ++ 7 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/apic/x2apic_savic.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1a2f5d148aa6..89e5c1f4aab2d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -473,6 +473,18 @@ config X86_X2APIC If you don't know what to do here, say N. +config AMD_SECURE_AVIC + bool "AMD Secure AVIC" + depends on X86_X2APIC && AMD_MEM_ENCRYPT + help + This enables AMD Secure AVIC support on guests that have this feature. + + AMD Secure AVIC provides hardware acceleration for performance sensitive + APIC accesses and support for managing guest owned APIC state for SEV-SNP + guests. + + If you don't know what to do here, say N. + config X86_POSTED_MSI bool "Enable MSI and MSI-x delivery by posted interrupts" depends on X86_64 && IRQ_REMAP diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index a93e363388669..ebff8ab518390 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -357,6 +357,7 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) MSR_AMD64_SNP_VMSA_REG_PROT | \ MSR_AMD64_SNP_RESERVED_BIT13 | \ MSR_AMD64_SNP_RESERVED_BIT15 | \ + MSR_AMD64_SNP_SECURE_AVIC_ENABLED | \ MSR_AMD64_SNP_RESERVED_MASK) /* diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 0f81f70aca822..4c3bc031e9a9d 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -100,6 +100,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_HOST_SEV_SNP: return cc_flags.host_sev_snp; + case CC_ATTR_SNP_SECURE_AVIC: + return sev_status & MSR_AMD64_SNP_SECURE_AVIC_ENABLED; + default: return false; } diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2b6e3127ef4e2..5946b90eb5e9d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -691,7 +691,9 @@ #define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT) #define MSR_AMD64_SNP_SMT_PROT_BIT 17 #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) -#define MSR_AMD64_SNP_RESV_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC_ENABLED BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) +#define MSR_AMD64_SNP_RESV_BIT 19 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 3bf0487cf3b73..12153993c12bf 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -18,6 +18,7 @@ ifeq ($(CONFIG_X86_64),y) # APIC probe will depend on the listing order here obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o obj-$(CONFIG_X86_UV) += x2apic_uv_x.o +obj-$(CONFIG_AMD_SECURE_AVIC) += x2apic_savic.o obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o obj-y += apic_flat_64.o diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c new file mode 100644 index 0000000000000..97dac09a7f424 --- /dev/null +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure AVIC Support (SEV-SNP Guests) + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Author: Kishon Vijay Abraham I + */ + +#include +#include + +#include +#include + +#include "local.h" + +static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); +} + +static void x2apic_savic_send_IPI(int cpu, int vector) +{ + u32 dest = per_cpu(x86_cpu_to_apicid, cpu); + + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); + __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); +} + +static void +__send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) +{ + unsigned long query_cpu; + unsigned long this_cpu; + unsigned long flags; + + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); + + local_irq_save(flags); + + this_cpu = smp_processor_id(); + for_each_cpu(query_cpu, mask) { + if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu) + continue; + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +static void x2apic_savic_send_IPI_mask(const struct cpumask *mask, int vector) +{ + __send_IPI_mask(mask, vector, APIC_DEST_ALLINC); +} + +static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +{ + __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); +} + +static int x2apic_savic_probe(void) +{ + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return 0; + + if (!x2apic_mode) { + pr_err("Secure AVIC enabled in non x2APIC mode\n"); + snp_abort(); + } + + pr_info("Secure AVIC Enabled\n"); + + return 1; +} + +static struct apic apic_x2apic_savic __ro_after_init = { + + .name = "secure avic x2apic", + .probe = x2apic_savic_probe, + .acpi_madt_oem_check = x2apic_savic_acpi_madt_oem_check, + + .dest_mode_logical = false, + + .disable_esr = 0, + + .cpu_present_to_apicid = default_cpu_present_to_apicid, + + .max_apic_id = UINT_MAX, + .x2apic_set_max_apicid = true, + .get_apic_id = x2apic_get_apic_id, + + .calc_dest_apicid = apic_default_calc_apicid, + + .send_IPI = x2apic_savic_send_IPI, + .send_IPI_mask = x2apic_savic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_savic_send_IPI_mask_allbutself, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_self = x2apic_send_IPI_self, + .nmi_to_offline_cpu = true, + + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .eoi = native_apic_msr_eoi, + .icr_read = native_x2apic_icr_read, + .icr_write = native_x2apic_icr_write, +}; + +apic_driver(apic_x2apic_savic); diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index caa4b44306346..8012086784502 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -88,6 +88,14 @@ enum cc_attr { * enabled to run SEV-SNP guests. */ CC_ATTR_HOST_SEV_SNP, + + /** + * @CC_ATTR_SNP_SECURE_AVIC: Secure AVIC mode is active. + * + * The host kernel is running with the necessary features enabled + * to run SEV-SNP guests with full Secure AVIC capabilities. + */ + CC_ATTR_SNP_SECURE_AVIC, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM From 433366c87568bdf395423d4ad3653be0fbe56c06 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:53 +0530 Subject: [PATCH 02/26] x86/apic: Initialize Secure AVIC APIC backing page With Secure AVIC, the APIC backing page is owned and managed by guest. Allocate APIC backing page for all guest CPUs. In addition, add a setup() APIC callback. This callback is used by Secure AVIC driver to initialize APIC backing page area for each CPU. Allocate APIC backing page memory area in chunks of 2M, so that backing page memory is mapped using full huge pages. Without this, if there are private to shared page state conversions for any non-backing-page allocation which is part of the same huge page as the one containing a backing page, hypervisor splits the huge page into 4K pages. Splitting of APIC backing page area into individual 4K pages can result in performance impact, due to TLB pressure. Secure AVIC requires that vCPU's APIC backing page's NPT entry is always present while that vCPU is running. If APIC backing page's NPT entry is not present, a VMEXIT_BUSY is returned on VMRUN and the vCPU cannot be resumed after that point. To handle this, invoke sev_notify_savic_gpa() in Secure AVIC driver's setup() callback. This triggers SVM_VMGEXIT_SECURE_ AVIC_GPA exit for the hypervisor to note GPA of the vCPU's APIC backing page. Hypervisor uses this information to ensure that the APIC backing page is mapped in NPT before invoking VMRUN. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay --- arch/x86/coco/sev/core.c | 22 +++++++++++++++++ arch/x86/include/asm/apic.h | 1 + arch/x86/include/asm/sev.h | 2 ++ arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kernel/apic/apic.c | 2 ++ arch/x86/kernel/apic/x2apic_savic.c | 38 +++++++++++++++++++++++++++++ 6 files changed, 66 insertions(+) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index a0b73e6ed7475..1a7b322c1e959 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1358,6 +1358,28 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } +enum es_result sev_notify_savic_gpa(u64 gpa) +{ + struct ghcb_state state; + struct es_em_ctxt ctxt; + unsigned long flags; + struct ghcb *ghcb; + int ret = 0; + + local_irq_save(flags); + + ghcb = __sev_get_ghcb(&state); + + vc_ghcb_invalidate(ghcb); + + ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SECURE_AVIC_GPA, gpa, 0); + + __sev_put_ghcb(&state); + + local_irq_restore(flags); + return ret; +} + static void snp_register_per_cpu_ghcb(void) { struct sev_es_runtime_data *data; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 33f677e2db756..c877378c78415 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -305,6 +305,7 @@ struct apic { /* Probe, setup and smpboot functions */ int (*probe)(void); + void (*setup)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); void (*init_apic_ldr)(void); diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 833954e5aadee..2b433d4332319 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -414,6 +414,7 @@ u64 sev_get_status(void); void sev_show_status(void); void snp_update_svsm_ca(void); void snp_mshv_vtl_return(u8 target_vtl); +enum es_result sev_notify_savic_gpa(u64 gpa); #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -451,6 +452,7 @@ static inline u64 sev_get_status(void) { return 0; } static inline void sev_show_status(void) { } static inline void snp_update_svsm_ca(void) { } static inline void snp_mshv_vtl_return(u8 input_vtl) { } +static inline enum es_result sev_notify_savic_gpa(u64 gpa) { return ES_UNSUPPORTED; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 1814b413fd578..0f21cea6d21c7 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -116,6 +116,7 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 +#define SVM_VMGEXIT_SECURE_AVIC_GPA 0x8000001a #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c5fb28e6451a3..85d2d53d6d068 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1504,6 +1504,8 @@ static void setup_local_APIC(void) return; } + if (apic->setup) + apic->setup(); /* * If this comes from kexec/kcrash the APIC might be enabled in * SPIV. Soft disable it before doing further initialization. diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 97dac09a7f424..d903c35b8b64f 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -9,12 +9,16 @@ #include #include +#include #include #include #include "local.h" +static DEFINE_PER_CPU(void *, apic_backing_page); +static DEFINE_PER_CPU(bool, savic_setup_done); + static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); @@ -61,8 +65,30 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } +static void x2apic_savic_setup(void) +{ + void *backing_page; + enum es_result ret; + unsigned long gpa; + + if (this_cpu_read(savic_setup_done)) + return; + + backing_page = this_cpu_read(apic_backing_page); + gpa = __pa(backing_page); + ret = sev_notify_savic_gpa(gpa); + if (ret != ES_OK) + snp_abort(); + this_cpu_write(savic_setup_done, true); +} + static int x2apic_savic_probe(void) { + void *backing_pages; + unsigned int cpu; + size_t sz; + int i; + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) return 0; @@ -71,6 +97,17 @@ static int x2apic_savic_probe(void) snp_abort(); } + sz = ALIGN(num_possible_cpus() * SZ_4K, SZ_2M); + backing_pages = kzalloc(sz, GFP_ATOMIC); + if (!backing_pages) + snp_abort(); + + i = 0; + for_each_possible_cpu(cpu) { + per_cpu(apic_backing_page, cpu) = backing_pages + i * SZ_4K; + i++; + } + pr_info("Secure AVIC Enabled\n"); return 1; @@ -81,6 +118,7 @@ static struct apic apic_x2apic_savic __ro_after_init = { .name = "secure avic x2apic", .probe = x2apic_savic_probe, .acpi_madt_oem_check = x2apic_savic_acpi_madt_oem_check, + .setup = x2apic_savic_setup, .dest_mode_logical = false, From 27c50f554f99fe1bb04e2eabda0b3e66082ec6d3 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 13 Sep 2024 17:06:54 +0530 Subject: [PATCH 03/26] x86/apic: Populate .read()/.write() callbacks of Secure AVIC driver The x2APIC registers are mapped at an offset within the guest APIC backing page which is same as their x2APIC MMIO offset. Secure AVIC adds new registers such as ALLOWED_IRRs (which are at 4-byte offset within the IRR register offset range) and NMI_REQ to the APIC register space. In addition, the APIC_ID register is writable and configured by guest. Add read() and write() APIC callback functions to read and write x2APIC registers directly from the guest APIC backing page. The default .read()/.write() callbacks of x2APIC drivers perform a rdmsr/wrmsr of the x2APIC registers. When Secure AVIC is enabled, these would result in #VC exception (for non-accelerated register accesses). The #VC exception handler reads/write the x2APIC register in the guest APIC backing page. Since this would increase the latency of accessing x2APIC registers, the read() and write() callbacks of Secure AVIC driver directly reads/writes to the guest APIC backing page. Co-developed-by: Kishon Vijay Abraham I Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/include/asm/apicdef.h | 2 + arch/x86/kernel/apic/x2apic_savic.c | 107 +++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 094106b6a5384..be39a543fbe5d 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -135,6 +135,8 @@ #define APIC_TDR_DIV_128 0xA #define APIC_EFEAT 0x400 #define APIC_ECTRL 0x410 +#define APIC_SEOI 0x420 +#define APIC_IER 0x480 #define APIC_EILVTn(n) (0x500 + 0x10 * n) #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ #define APIC_EILVT_NR_AMD_10H 4 diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index d903c35b8b64f..6a471bbc3dbae 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,108 @@ static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); } +static inline u32 get_reg(char *page, int reg_off) +{ + return READ_ONCE(*((u32 *)(page + reg_off))); +} + +static inline void set_reg(char *page, int reg_off, u32 val) +{ + WRITE_ONCE(*((u32 *)(page + reg_off)), val); +} + +#define SAVIC_ALLOWED_IRR_OFFSET 0x204 + +static u32 x2apic_savic_read(u32 reg) +{ + void *backing_page = this_cpu_read(apic_backing_page); + + switch (reg) { + case APIC_LVTT: + case APIC_TMICT: + case APIC_TMCCT: + case APIC_TDCR: + case APIC_ID: + case APIC_LVR: + case APIC_TASKPRI: + case APIC_ARBPRI: + case APIC_PROCPRI: + case APIC_LDR: + case APIC_SPIV: + case APIC_ESR: + case APIC_ICR: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTERR: + case APIC_EFEAT: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + return get_reg(backing_page, reg); + case APIC_ISR ... APIC_ISR + 0x70: + case APIC_TMR ... APIC_TMR + 0x70: + WARN_ONCE(!IS_ALIGNED(reg, 16), "Reg offset %#x not aligned at 16 bytes", reg); + return get_reg(backing_page, reg); + /* IRR and ALLOWED_IRR offset range */ + case APIC_IRR ... APIC_IRR + 0x74: + /* + * Either aligned at 16 bytes for valid IRR reg offset or a + * valid Secure AVIC ALLOWED_IRR offset. + */ + WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)), + "Misaligned IRR/ALLOWED_IRR reg offset %#x", reg); + return get_reg(backing_page, reg); + default: + pr_err("Permission denied: read of Secure AVIC reg offset %#x\n", reg); + return 0; + } +} + +#define SAVIC_NMI_REQ_OFFSET 0x278 + +static void x2apic_savic_write(u32 reg, u32 data) +{ + void *backing_page = this_cpu_read(apic_backing_page); + + switch (reg) { + case APIC_LVTT: + case APIC_LVT0: + case APIC_LVT1: + case APIC_TMICT: + case APIC_TDCR: + case APIC_SELF_IPI: + /* APIC_ID is writable and configured by guest for Secure AVIC */ + case APIC_ID: + case APIC_TASKPRI: + case APIC_EOI: + case APIC_SPIV: + case SAVIC_NMI_REQ_OFFSET: + case APIC_ESR: + case APIC_ICR: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVTERR: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + set_reg(backing_page, reg, data); + break; + /* ALLOWED_IRR offsets are writable */ + case SAVIC_ALLOWED_IRR_OFFSET ... SAVIC_ALLOWED_IRR_OFFSET + 0x70: + if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)) { + set_reg(backing_page, reg, data); + break; + } + fallthrough; + default: + pr_err("Permission denied: write to Secure AVIC reg offset %#x\n", reg); + } +} + static void x2apic_savic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_apicid, cpu); @@ -140,8 +243,8 @@ static struct apic apic_x2apic_savic __ro_after_init = { .send_IPI_self = x2apic_send_IPI_self, .nmi_to_offline_cpu = true, - .read = native_apic_msr_read, - .write = native_apic_msr_write, + .read = x2apic_savic_read, + .write = x2apic_savic_write, .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, From c71652c8337032375ff9dfea61337a228fb2b69d Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:55 +0530 Subject: [PATCH 04/26] x86/apic: Initialize APIC backing page for Secure AVIC Secure AVIC lets guest manage the APIC backing page (unlike emulated x2APIC or x2AVIC where the hypervisor manages the APIC backing page). However the introduced Secure AVIC Linux design still maintains the APIC backing page in the hypervisor to shadow the APIC backing page maintained by guest (It should be noted only subset of the registers are shadowed for specific usecases and registers like APIC_IRR, APIC_ISR are not shadowed). Add sev_ghcb_msr_read() to invoke "SVM_EXIT_MSR" VMGEXIT to read MSRs from hypervisor. Initialize the Secure AVIC's APIC backing page by copying the initial state of shadow APIC backing page in the hypervisor to the guest APIC backing page. Specifically copy APIC_LVR, APIC_LDR, and APIC_LVT MSRs from the shadow APIC backing page. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay --- arch/x86/coco/sev/core.c | 41 ++++++++++++++++----- arch/x86/include/asm/sev.h | 2 ++ arch/x86/kernel/apic/x2apic_savic.c | 55 +++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 8 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 1a7b322c1e959..ce88a8281074b 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1322,18 +1322,15 @@ int __init sev_es_efi_map_ghcbs(pgd_t *pgd) return 0; } -static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +static enum es_result __vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write) { struct pt_regs *regs = ctxt->regs; + u64 exit_info_1 = write ? 1 : 0; enum es_result ret; - u64 exit_info_1; - - /* Is it a WRMSR? */ - exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; if (regs->cx == MSR_SVSM_CAA) { /* Writes to the SVSM CAA msr are ignored */ - if (exit_info_1) + if (write) return ES_OK; regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); @@ -1343,14 +1340,14 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) } ghcb_set_rcx(ghcb, regs->cx); - if (exit_info_1) { + if (write) { ghcb_set_rax(ghcb, regs->ax); ghcb_set_rdx(ghcb, regs->dx); } ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); - if ((ret == ES_OK) && (!exit_info_1)) { + if (ret == ES_OK && !write) { regs->ax = ghcb->save.rax; regs->dx = ghcb->save.rdx; } @@ -1358,6 +1355,34 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } +static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +{ + return __vc_handle_msr(ghcb, ctxt, ctxt->insn.opcode.bytes[1] == 0x30); +} + +enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) +{ + struct pt_regs regs = { .cx = msr }; + struct es_em_ctxt ctxt = { .regs = ®s }; + struct ghcb_state state; + unsigned long flags; + enum es_result ret; + struct ghcb *ghcb; + + local_irq_save(flags); + ghcb = __sev_get_ghcb(&state); + vc_ghcb_invalidate(ghcb); + + ret = __vc_handle_msr(ghcb, &ctxt, false); + if (ret == ES_OK) + *value = regs.ax | regs.dx << 32; + + __sev_put_ghcb(&state); + local_irq_restore(flags); + + return ret; +} + enum es_result sev_notify_savic_gpa(u64 gpa) { struct ghcb_state state; diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 2b433d4332319..f1a55ec631066 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -415,6 +415,7 @@ void sev_show_status(void); void snp_update_svsm_ca(void); void snp_mshv_vtl_return(u8 target_vtl); enum es_result sev_notify_savic_gpa(u64 gpa); +enum es_result sev_ghcb_msr_read(u64 msr, u64 *value); #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -453,6 +454,7 @@ static inline void sev_show_status(void) { } static inline void snp_update_svsm_ca(void) { } static inline void snp_mshv_vtl_return(u8 input_vtl) { } static inline enum es_result sev_notify_savic_gpa(u64 gpa) { return ES_UNSUPPORTED; } +static inline enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) { return ES_UNSUPPORTED; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 6a471bbc3dbae..99151be4e1734 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,19 @@ static DEFINE_PER_CPU(void *, apic_backing_page); static DEFINE_PER_CPU(bool, savic_setup_done); +enum lapic_lvt_entry { + LVT_TIMER, + LVT_THERMAL_MONITOR, + LVT_PERFORMANCE_COUNTER, + LVT_LINT0, + LVT_LINT1, + LVT_ERROR, + + APIC_MAX_NR_LVT_ENTRIES, +}; + +#define APIC_LVTx(x) (APIC_LVTT + 0x10 * (x)) + static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); @@ -35,6 +49,22 @@ static inline void set_reg(char *page, int reg_off, u32 val) WRITE_ONCE(*((u32 *)(page + reg_off)), val); } +static u32 read_msr_from_hv(u32 reg) +{ + u64 data, msr; + int ret; + + msr = APIC_BASE_MSR + (reg >> 4); + ret = sev_ghcb_msr_read(msr, &data); + if (ret != ES_OK) { + pr_err("Secure AVIC msr (%#llx) read returned error (%d)\n", msr, ret); + /* MSR read failures are treated as fatal errors */ + snp_abort(); + } + + return lower_32_bits(data); +} + #define SAVIC_ALLOWED_IRR_OFFSET 0x204 static u32 x2apic_savic_read(u32 reg) @@ -168,6 +198,30 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } +static void init_backing_page(void *backing_page) +{ + u32 val; + int i; + + val = read_msr_from_hv(APIC_LVR); + set_reg(backing_page, APIC_LVR, val); + + /* + * Hypervisor is used for all timer related functions, + * so don't copy those values. + */ + for (i = LVT_THERMAL_MONITOR; i < APIC_MAX_NR_LVT_ENTRIES; i++) { + val = read_msr_from_hv(APIC_LVTx(i)); + set_reg(backing_page, APIC_LVTx(i), val); + } + + val = read_msr_from_hv(APIC_LVT0); + set_reg(backing_page, APIC_LVT0, val); + + val = read_msr_from_hv(APIC_LDR); + set_reg(backing_page, APIC_LDR, val); +} + static void x2apic_savic_setup(void) { void *backing_page; @@ -178,6 +232,7 @@ static void x2apic_savic_setup(void) return; backing_page = this_cpu_read(apic_backing_page); + init_backing_page(backing_page); gpa = __pa(backing_page); ret = sev_notify_savic_gpa(gpa); if (ret != ES_OK) From 2fdf85a56c929e85c31be6a054a4c488d3f63308 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 13 Sep 2024 17:06:56 +0530 Subject: [PATCH 05/26] x86/apic: Initialize APIC ID for Secure AVIC Initialize the APIC ID in the APIC backing page with the CPUID function 0000_000bh_EDX (Extended Topology Enumeration), and ensure that APIC ID msr read from hypervisor is consistent with the value read from CPUID. Signed-off-by: Neeraj Upadhyay --- arch/x86/kernel/apic/x2apic_savic.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 99151be4e1734..09fbc1857bf3a 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -14,6 +14,7 @@ #include #include +#include #include #include "local.h" @@ -200,6 +201,8 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in static void init_backing_page(void *backing_page) { + u32 hv_apic_id; + u32 apic_id; u32 val; int i; @@ -220,6 +223,13 @@ static void init_backing_page(void *backing_page) val = read_msr_from_hv(APIC_LDR); set_reg(backing_page, APIC_LDR, val); + + /* Read APIC ID from Extended Topology Enumeration CPUID */ + apic_id = cpuid_edx(0x0000000b); + hv_apic_id = read_msr_from_hv(APIC_ID); + WARN_ONCE(hv_apic_id != apic_id, "Inconsistent APIC_ID values: %d (cpuid), %d (msr)", + apic_id, hv_apic_id); + set_reg(backing_page, APIC_ID, apic_id); } static void x2apic_savic_setup(void) From 86e08ecb8dcdb5f96f8d9aee2007dc39a2543e8a Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:57 +0530 Subject: [PATCH 06/26] x86/apic: Add update_vector callback for Secure AVIC Add update_vector callback to set/clear ALLOWED_IRR field in the APIC backing page. The allowed IRR vector indicates the interrupt vectors which the guest allows the hypervisor to send (typically for emulated devices). ALLOWED_IRR is meant to be used specifically for vectors that the hypervisor is allowed to inject, such as device interrupts. Interrupt vectors used exclusively by the guest itself (like IPI vectors) should not be allowed to be injected into the guest for security reasons. The update_vector callback is invoked from APIC vector domain whenever a vector is allocated, freed or moved. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay --- arch/x86/include/asm/apic.h | 2 ++ arch/x86/kernel/apic/vector.c | 8 ++++++++ arch/x86/kernel/apic/x2apic_savic.c | 21 +++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c877378c78415..1ab0e22a71873 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -318,6 +318,8 @@ struct apic { /* wakeup secondary CPU using 64-bit wakeup point */ int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu); + void (*update_vector)(unsigned int cpu, unsigned int vector, bool set); + char *name; }; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 557318145038e..5aa65a732b055 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -174,6 +174,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->prev_cpu = apicd->cpu; WARN_ON_ONCE(apicd->cpu == newcpu); } else { + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, false); irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, managed); } @@ -183,6 +185,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); per_cpu(vector_irq, newcpu)[newvec] = desc; + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, true); } static void vector_assign_managed_shutdown(struct irq_data *irqd) @@ -528,11 +532,15 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, if (irqd_is_activated(irqd)) { trace_vector_setup(virq, true, 0); apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, true); } else { /* Release the vector */ apicd->can_reserve = true; irqd_set_can_reserve(irqd); clear_irq_vector(irqd); + if (apic->update_vector) + apic->update_vector(apicd->cpu, apicd->vector, false); realloc = true; } raw_spin_unlock_irqrestore(&vector_lock, flags); diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 09fbc1857bf3a..a9e54c1c64464 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -19,6 +19,9 @@ #include "local.h" +#define VEC_POS(v) ((v) & (32 - 1)) +#define REG_POS(v) (((v) >> 5) << 4) + static DEFINE_PER_CPU(void *, apic_backing_page); static DEFINE_PER_CPU(bool, savic_setup_done); @@ -199,6 +202,22 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } +static void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) +{ + void *backing_page; + unsigned long *reg; + int reg_off; + + backing_page = per_cpu(apic_backing_page, cpu); + reg_off = SAVIC_ALLOWED_IRR_OFFSET + REG_POS(vector); + reg = (unsigned long *)((char *)backing_page + reg_off); + + if (set) + test_and_set_bit(VEC_POS(vector), reg); + else + test_and_clear_bit(VEC_POS(vector), reg); +} + static void init_backing_page(void *backing_page) { u32 hv_apic_id; @@ -313,6 +332,8 @@ static struct apic apic_x2apic_savic __ro_after_init = { .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, + + .update_vector = x2apic_savic_update_vector, }; apic_driver(apic_x2apic_savic); From 52cf2ea24ee876591946b423930db1df04b30ea8 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:58 +0530 Subject: [PATCH 07/26] x86/apic: Add support to send IPI for Secure AVIC With Secure AVIC only Self-IPI is accelerated. To handle all the other IPIs, add new callbacks for sending IPI, which write to the IRR of the target guest APIC backing page (after decoding the ICR register) and then issue VMGEXIT for the hypervisor to notify the target vCPU. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay --- arch/x86/coco/sev/core.c | 25 +++++ arch/x86/include/asm/sev.h | 2 + arch/x86/kernel/apic/x2apic_savic.c | 152 +++++++++++++++++++++++++--- 3 files changed, 166 insertions(+), 13 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index ce88a8281074b..d6fa563ac9ec1 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1383,6 +1383,31 @@ enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) return ret; } +enum es_result sev_ghcb_msr_write(u64 msr, u64 value) +{ + struct pt_regs regs = { + .cx = msr, + .ax = lower_32_bits(value), + .dx = upper_32_bits(value) + }; + struct es_em_ctxt ctxt = { .regs = ®s }; + struct ghcb_state state; + unsigned long flags; + enum es_result ret; + struct ghcb *ghcb; + + local_irq_save(flags); + ghcb = __sev_get_ghcb(&state); + vc_ghcb_invalidate(ghcb); + + ret = __vc_handle_msr(ghcb, &ctxt, true); + + __sev_put_ghcb(&state); + local_irq_restore(flags); + + return ret; +} + enum es_result sev_notify_savic_gpa(u64 gpa) { struct ghcb_state state; diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index f1a55ec631066..ced3d8014ef4a 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -416,6 +416,7 @@ void snp_update_svsm_ca(void); void snp_mshv_vtl_return(u8 target_vtl); enum es_result sev_notify_savic_gpa(u64 gpa); enum es_result sev_ghcb_msr_read(u64 msr, u64 *value); +enum es_result sev_ghcb_msr_write(u64 msr, u64 value); #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -455,6 +456,7 @@ static inline void snp_update_svsm_ca(void) { } static inline void snp_mshv_vtl_return(u8 input_vtl) { } static inline enum es_result sev_notify_savic_gpa(u64 gpa) { return ES_UNSUPPORTED; } static inline enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) { return ES_UNSUPPORTED; } +static inline enum es_result sev_ghcb_msr_write(u64 msr, u64 value) { return ES_UNSUPPORTED; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index a9e54c1c64464..30a24b70e5cb3 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -69,6 +69,20 @@ static u32 read_msr_from_hv(u32 reg) return lower_32_bits(data); } +static void write_msr_to_hv(u32 reg, u64 data) +{ + u64 msr; + int ret; + + msr = APIC_BASE_MSR + (reg >> 4); + ret = sev_ghcb_msr_write(msr, data); + if (ret != ES_OK) { + pr_err("Secure AVIC msr (%#llx) write returned error (%d)\n", msr, ret); + /* MSR writes should never fail. Any failure is fatal error for SNP guest */ + snp_abort(); + } +} + #define SAVIC_ALLOWED_IRR_OFFSET 0x204 static u32 x2apic_savic_read(u32 reg) @@ -124,6 +138,7 @@ static u32 x2apic_savic_read(u32 reg) static void x2apic_savic_write(u32 reg, u32 data) { void *backing_page = this_cpu_read(apic_backing_page); + unsigned int cfg; switch (reg) { case APIC_LVTT: @@ -131,7 +146,6 @@ static void x2apic_savic_write(u32 reg, u32 data) case APIC_LVT1: case APIC_TMICT: case APIC_TDCR: - case APIC_SELF_IPI: /* APIC_ID is writable and configured by guest for Secure AVIC */ case APIC_ID: case APIC_TASKPRI: @@ -149,6 +163,11 @@ static void x2apic_savic_write(u32 reg, u32 data) case APIC_EILVTn(0) ... APIC_EILVTn(3): set_reg(backing_page, reg, data); break; + /* Self IPIs are accelerated by hardware, use wrmsr */ + case APIC_SELF_IPI: + cfg = __prepare_ICR(APIC_DEST_SELF, data, 0); + native_x2apic_icr_write(cfg, 0); + break; /* ALLOWED_IRR offsets are writable */ case SAVIC_ALLOWED_IRR_OFFSET ... SAVIC_ALLOWED_IRR_OFFSET + 0x70: if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)) { @@ -161,13 +180,100 @@ static void x2apic_savic_write(u32 reg, u32 data) } } +static void send_ipi(int cpu, int vector) +{ + void *backing_page; + int reg_off; + + backing_page = per_cpu(apic_backing_page, cpu); + reg_off = APIC_IRR + REG_POS(vector); + /* + * Use test_and_set_bit() to ensure that IRR updates are atomic w.r.t. other + * IRR updates such as during VMRUN and during CPU interrupt handling flow. + */ + test_and_set_bit(VEC_POS(vector), (unsigned long *)((char *)backing_page + reg_off)); +} + +static void send_ipi_dest(u64 icr_data) +{ + int vector, cpu; + + vector = icr_data & APIC_VECTOR_MASK; + cpu = icr_data >> 32; + + send_ipi(cpu, vector); +} + +static void send_ipi_target(u64 icr_data) +{ + if (icr_data & APIC_DEST_LOGICAL) { + pr_err("IPI target should be of PHYSICAL type\n"); + return; + } + + send_ipi_dest(icr_data); +} + +static void send_ipi_allbut(u64 icr_data) +{ + const struct cpumask *self_cpu_mask = get_cpu_mask(smp_processor_id()); + unsigned long flags; + int vector, cpu; + + vector = icr_data & APIC_VECTOR_MASK; + local_irq_save(flags); + for_each_cpu_andnot(cpu, cpu_present_mask, self_cpu_mask) + send_ipi(cpu, vector); + write_msr_to_hv(APIC_ICR, icr_data); + local_irq_restore(flags); +} + +static void send_ipi_allinc(u64 icr_data) +{ + int vector; + + send_ipi_allbut(icr_data); + vector = icr_data & APIC_VECTOR_MASK; + native_x2apic_icr_write(APIC_DEST_SELF | vector, 0); +} + +static void x2apic_savic_icr_write(u32 icr_low, u32 icr_high) +{ + int dsh, vector; + u64 icr_data; + + icr_data = ((u64)icr_high) << 32 | icr_low; + dsh = icr_low & APIC_DEST_ALLBUT; + + switch (dsh) { + case APIC_DEST_SELF: + vector = icr_data & APIC_VECTOR_MASK; + x2apic_savic_write(APIC_SELF_IPI, vector); + break; + case APIC_DEST_ALLINC: + send_ipi_allinc(icr_data); + break; + case APIC_DEST_ALLBUT: + send_ipi_allbut(icr_data); + break; + default: + send_ipi_target(icr_data); + write_msr_to_hv(APIC_ICR, icr_data); + } +} + +static void __send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) +{ + unsigned int cfg = __prepare_ICR(0, vector, dest); + + x2apic_savic_icr_write(cfg, apicid); +} + static void x2apic_savic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_apicid, cpu); - /* x2apic MSRs are special and need a special fence: */ - weak_wrmsr_fence(); - __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); + __send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); } static void @@ -177,18 +283,16 @@ __send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long this_cpu; unsigned long flags; - /* x2apic MSRs are special and need a special fence: */ - weak_wrmsr_fence(); - local_irq_save(flags); this_cpu = smp_processor_id(); for_each_cpu(query_cpu, mask) { if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu) continue; - __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); + __send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, + APIC_DEST_PHYSICAL); } + local_irq_restore(flags); } @@ -202,6 +306,28 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in __send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } +static void __send_IPI_shorthand(int vector, u32 which) +{ + unsigned int cfg = __prepare_ICR(which, vector, 0); + + x2apic_savic_icr_write(cfg, 0); +} + +static void x2apic_savic_send_IPI_allbutself(int vector) +{ + __send_IPI_shorthand(vector, APIC_DEST_ALLBUT); +} + +static void x2apic_savic_send_IPI_all(int vector) +{ + __send_IPI_shorthand(vector, APIC_DEST_ALLINC); +} + +static void x2apic_savic_send_IPI_self(int vector) +{ + __send_IPI_shorthand(vector, APIC_DEST_SELF); +} + static void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) { void *backing_page; @@ -322,16 +448,16 @@ static struct apic apic_x2apic_savic __ro_after_init = { .send_IPI = x2apic_savic_send_IPI, .send_IPI_mask = x2apic_savic_send_IPI_mask, .send_IPI_mask_allbutself = x2apic_savic_send_IPI_mask_allbutself, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_self = x2apic_send_IPI_self, + .send_IPI_allbutself = x2apic_savic_send_IPI_allbutself, + .send_IPI_all = x2apic_savic_send_IPI_all, + .send_IPI_self = x2apic_savic_send_IPI_self, .nmi_to_offline_cpu = true, .read = x2apic_savic_read, .write = x2apic_savic_write, .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, - .icr_write = native_x2apic_icr_write, + .icr_write = x2apic_savic_icr_write, .update_vector = x2apic_savic_update_vector, }; From ce61a90c3666fae024016d383b1fe27119f0a695 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:06:59 +0530 Subject: [PATCH 08/26] x86/apic: Support LAPIC timer for Secure AVIC Secure AVIC requires LAPIC timer to be emulated by hypervisor. KVM already supports emulating LAPIC timer using hrtimers. In order to emulate LAPIC timer, APIC_LVTT, APIC_TMICT and APIC_TDCR register values need to be propagated to the hypervisor for arming the timer. APIC_TMCCT register value has to be read from the hypervisor, which is required for calibrating the APIC timer. So, read/write all APIC timer registers from/to the hypervisor. In addition, configure APIC_ALLOWED_IRR for the hypervisor to inject timer interrupt using LOCAL_TIMER_VECTOR. Signed-off-by: Kishon Vijay Abraham I Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay Signed-off-by: Tianyu Lan --- arch/x86/kernel/apic/apic.c | 4 ++++ arch/x86/kernel/apic/x2apic_savic.c | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 85d2d53d6d068..95ae177dff885 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -591,6 +591,10 @@ static void setup_APIC_timer(void) 0xF, ~0UL); } else clockevents_register_device(levt); + + if (apic->update_vector) + apic->update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, + true); } /* diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 30a24b70e5cb3..2eab9a773005c 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -94,6 +94,7 @@ static u32 x2apic_savic_read(u32 reg) case APIC_TMICT: case APIC_TMCCT: case APIC_TDCR: + return read_msr_from_hv(reg); case APIC_ID: case APIC_LVR: case APIC_TASKPRI: @@ -142,10 +143,12 @@ static void x2apic_savic_write(u32 reg, u32 data) switch (reg) { case APIC_LVTT: - case APIC_LVT0: - case APIC_LVT1: case APIC_TMICT: case APIC_TDCR: + write_msr_to_hv(reg, data); + break; + case APIC_LVT0: + case APIC_LVT1: /* APIC_ID is writable and configured by guest for Secure AVIC */ case APIC_ID: case APIC_TASKPRI: From 924b380e87e10308dc7291b2008b22ca57ae0f1f Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:07:00 +0530 Subject: [PATCH 09/26] x86/sev: Initialize VGIF for secondary VCPUs for Secure AVIC VINTR_CTRL in VMSA should be configured for Secure AVIC. Configure for secondary vCPUs (the configuration for boot CPU is done in hypervisor). Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/coco/sev/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index d6fa563ac9ec1..8e9bdddc77003 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1181,6 +1181,9 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + vmsa->vintr_ctrl |= V_GIF_MASK; + /* SVME must be set. */ vmsa->efer = EFER_SVME; From 77fb0c1243426b2a12a5135bbddb6a046a4623cd Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:07:01 +0530 Subject: [PATCH 10/26] x86/apic: Add support to send NMI IPI for Secure AVIC Secure AVIC has introduced a new field in the APIC backing page "NmiReq" that has to be set by the guest to request a NMI IPI. Add support to set NmiReq appropriately to send NMI IPI. This also requires Virtual NMI feature to be enabled in VINTRL_CTRL field in the VMSA. However this would be added by a later commit after adding support for injecting NMI from the hypervisor. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/kernel/apic/x2apic_savic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 2eab9a773005c..5502a828a7956 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -183,7 +183,7 @@ static void x2apic_savic_write(u32 reg, u32 data) } } -static void send_ipi(int cpu, int vector) +static void send_ipi(int cpu, int vector, bool nmi) { void *backing_page; int reg_off; @@ -195,16 +195,20 @@ static void send_ipi(int cpu, int vector) * IRR updates such as during VMRUN and during CPU interrupt handling flow. */ test_and_set_bit(VEC_POS(vector), (unsigned long *)((char *)backing_page + reg_off)); + if (nmi) + set_reg(backing_page, SAVIC_NMI_REQ_OFFSET, nmi); } static void send_ipi_dest(u64 icr_data) { int vector, cpu; + bool nmi; vector = icr_data & APIC_VECTOR_MASK; cpu = icr_data >> 32; + nmi = ((icr_data & APIC_DM_FIXED_MASK) == APIC_DM_NMI); - send_ipi(cpu, vector); + send_ipi(cpu, vector, nmi); } static void send_ipi_target(u64 icr_data) @@ -222,11 +226,13 @@ static void send_ipi_allbut(u64 icr_data) const struct cpumask *self_cpu_mask = get_cpu_mask(smp_processor_id()); unsigned long flags; int vector, cpu; + bool nmi; vector = icr_data & APIC_VECTOR_MASK; + nmi = ((icr_data & APIC_DM_FIXED_MASK) == APIC_DM_NMI); local_irq_save(flags); for_each_cpu_andnot(cpu, cpu_present_mask, self_cpu_mask) - send_ipi(cpu, vector); + send_ipi(cpu, vector, nmi); write_msr_to_hv(APIC_ICR, icr_data); local_irq_restore(flags); } From 98c809055a796ef01064810fe8f4b2c612bc98f8 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:07:02 +0530 Subject: [PATCH 11/26] x86/apic: Allow NMI to be injected from hypervisor for Secure AVIC Secure AVIC requires "AllowedNmi" bit in the Secure AVIC Control MSR to be set for NMI to be injected from hypervisor. Set "AllowedNmi" bit in Secure AVIC Control MSR here to allow NMI interrupts to be injected from hypervisor. While at that, also propagate APIC_LVT0 and APIC_LVT1 register values to the hypervisor required for injecting NMI interrupts from hypervisor. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/include/asm/msr-index.h | 5 +++++ arch/x86/kernel/apic/x2apic_savic.c | 10 ++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5946b90eb5e9d..fb9b788d20d3d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -695,6 +695,11 @@ #define MSR_AMD64_SNP_SECURE_AVIC_ENABLED BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) #define MSR_AMD64_SNP_RESV_BIT 19 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) +#define MSR_AMD64_SECURE_AVIC_CONTROL 0xc0010138 +#define MSR_AMD64_SECURE_AVIC_EN_BIT 0 +#define MSR_AMD64_SECURE_AVIC_EN BIT_ULL(MSR_AMD64_SECURE_AVIC_EN_BIT) +#define MSR_AMD64_SECURE_AVIC_ALLOWEDNMI_BIT 1 +#define MSR_AMD64_SECURE_AVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SECURE_AVIC_ALLOWEDNMI_BIT) #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 5502a828a7956..321b3678e26f7 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -38,6 +38,11 @@ enum lapic_lvt_entry { #define APIC_LVTx(x) (APIC_LVTT + 0x10 * (x)) +static inline void savic_wr_control_msr(u64 val) +{ + native_wrmsr(MSR_AMD64_SECURE_AVIC_CONTROL, lower_32_bits(val), upper_32_bits(val)); +} + static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); @@ -143,12 +148,12 @@ static void x2apic_savic_write(u32 reg, u32 data) switch (reg) { case APIC_LVTT: + case APIC_LVT0: + case APIC_LVT1: case APIC_TMICT: case APIC_TDCR: write_msr_to_hv(reg, data); break; - case APIC_LVT0: - case APIC_LVT1: /* APIC_ID is writable and configured by guest for Secure AVIC */ case APIC_ID: case APIC_TASKPRI: @@ -401,6 +406,7 @@ static void x2apic_savic_setup(void) ret = sev_notify_savic_gpa(gpa); if (ret != ES_OK) snp_abort(); + savic_wr_control_msr(gpa | MSR_AMD64_SECURE_AVIC_ALLOWEDNMI); this_cpu_write(savic_setup_done, true); } From d19a53862ae025090d26b867a4e98d77cf660af8 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:07:03 +0530 Subject: [PATCH 12/26] x86/sev: Enable NMI support for Secure AVIC Now that support to send NMI IPI and support to inject NMI from hypervisor has been added, set V_NMI_ENABLE in VINTR_CTRL field of VMSA to enable NMI. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/coco/sev/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 8e9bdddc77003..a1eadbcbbe9e3 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1182,7 +1182,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) - vmsa->vintr_ctrl |= V_GIF_MASK; + vmsa->vintr_ctrl |= (V_GIF_MASK | V_NMI_ENABLE_MASK); /* SVME must be set. */ vmsa->efer = EFER_SVME; From 9bae70fa30af307feb0ab6590d321b1ac45ef084 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 13 Sep 2024 17:07:04 +0530 Subject: [PATCH 13/26] x86/apic: Enable Secure AVIC in Control MSR With all the pieces in place now, enable Secure AVIC in Secure AVIC Control MSR. Any access to x2APIC MSRs are emulated by hypervisor before Secure AVIC is enabled in the Control MSR. Post Secure AVIC enablement, all x2APIC MSR accesses (whether accelerated by AVIC hardware or trapped as #VC exception) operate on guest APIC backing page. Signed-off-by: Neeraj Upadhyay --- arch/x86/kernel/apic/x2apic_savic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 321b3678e26f7..a3f0ddc6b5b62 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -406,7 +406,7 @@ static void x2apic_savic_setup(void) ret = sev_notify_savic_gpa(gpa); if (ret != ES_OK) snp_abort(); - savic_wr_control_msr(gpa | MSR_AMD64_SECURE_AVIC_ALLOWEDNMI); + savic_wr_control_msr(gpa | MSR_AMD64_SECURE_AVIC_EN | MSR_AMD64_SECURE_AVIC_ALLOWEDNMI); this_cpu_write(savic_setup_done, true); } From 328d17e6447c5c65fcf7948be02b95e672871cd3 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 13 Sep 2024 17:07:05 +0530 Subject: [PATCH 14/26] x86/sev: Indicate SEV-SNP guest supports Secure AVIC Now that Secure AVIC support is added in the guest, indicate SEV-SNP guest supports Secure AVIC. Without this, the guest terminates booting with Non-Automatic Exit(NAE) termination request event. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay --- arch/x86/boot/compressed/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index ebff8ab518390..2deeac5bec02a 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -365,7 +365,7 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) * by the guest kernel. As and when a new feature is implemented in the * guest kernel, a corresponding bit should be added to the mask. */ -#define SNP_FEATURES_PRESENT MSR_AMD64_SNP_DEBUG_SWAP +#define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | MSR_AMD64_SNP_SECURE_AVIC_ENABLED) u64 snp_get_unsupported_features(u64 status) { From b6280c84756754249d4035a352c6842e661a8f68 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Fri, 25 Apr 2025 10:24:07 -0400 Subject: [PATCH 15/26] x86/Hyper-V: Add Hyper-V specific hvcall to set backing page Secure AVIC provides backing page to aid the guest in limiting which interrupt vectors can be injected into the guest. Hyper-V has specific hvcall to set backing page and call it in Secure AVIC driver. Signed-off-by: Tianyu Lan --- arch/x86/hyperv/hv_init.c | 24 +++++++++++++++- arch/x86/hyperv/ivm.c | 44 +++++++++++++++++++++++++++++ arch/x86/include/asm/hyperv-tlfs.h | 8 ++++++ arch/x86/include/asm/mshyperv.h | 2 ++ arch/x86/kernel/apic/x2apic_savic.c | 13 ++++++++- include/asm-generic/hyperv-tlfs.h | 26 +++++++++++++++-- 6 files changed, 113 insertions(+), 4 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a44c60c105f84..b31fabf091bb5 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -40,6 +40,7 @@ void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); +void *hv_vp_early_input_arg; union hv_ghcb * __percpu *hv_ghcb_pg; /* Storage to save the hypercall page temporarily for hibernation */ @@ -357,6 +358,7 @@ void __init hyperv_init(void) u64 guest_id; union hv_x64_msr_hypercall_contents hypercall_msr; int cpuhp; + int ret; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -364,6 +366,22 @@ void __init hyperv_init(void) if (hv_common_init()) return; + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) { + hv_vp_early_input_arg = kcalloc(num_possible_cpus(), + PAGE_SIZE, + GFP_KERNEL); + if (hv_vp_early_input_arg) { + ret = set_memory_decrypted(hv_vp_early_input_arg, + num_possible_cpus()); + if (ret) { + kfree(hv_vp_early_input_arg); + goto common_free; + } + } else { + goto common_free; + } + } + /* * The VP assist page is useless to a TDX guest: the only use we * would have for it is lazy EOI, which can not be used with TDX. @@ -378,7 +396,7 @@ void __init hyperv_init(void) ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; if (!hv_isolation_type_tdx()) - goto common_free; + goto free_vp_early_input_arg; } if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) { @@ -538,6 +556,10 @@ void __init hyperv_init(void) free_vp_assist_page: kfree(hv_vp_assist_page); hv_vp_assist_page = NULL; +free_vp_early_input_arg: + set_memory_encrypted(hv_vp_early_input_arg, num_possible_cpus()); + kfree(hv_vp_early_input_arg); + hv_vp_early_input_arg = NULL; common_free: hv_common_free(); } diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 41cbbb15ca310..829e5ac17982d 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -65,6 +65,13 @@ union hv_ghcb { /* Only used in an SNP VM with the paravisor */ static u16 hv_ghcb_version __ro_after_init; +/* + * Use static page to set Secure AVIC backing page. + * The operation happens before allocating input arg + * page when start AP. + */ +static u8 inputbuf[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE); + /* Functions only used in an SNP VM with the paravisor go here. */ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) { @@ -289,6 +296,43 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } +enum es_result hv_set_savic_backing_page(u64 gfn) +{ + u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_SET_VP_REGISTERS; + struct hv_set_vp_registers_input *input + = hv_vp_early_input_arg + smp_processor_id() * PAGE_SIZE; + union hv_x64_register_sev_gpa_page value; + unsigned long flags; + int retry = 5; + u64 ret; + + local_irq_save(flags); + + value.enabled = 1; + value.reserved = 0; + value.pagenumber = gfn; + + memset(input, 0, struct_size(input, element, 1)); + input->header.partitionid = HV_PARTITION_ID_SELF; + input->header.vpindex = HV_VP_INDEX_SELF; + input->header.inputvtl = ms_hyperv.vtl; + input->element[0].name = HV_X64_REGISTER_SEV_AVIC_GPA; + input->element[0].value.reg64 = value.u64; + + do { + ret = hv_do_hypercall(control, input, NULL); + if (!hv_result_success(ret)) + pr_err("Failed to set secure AVIC backing page %llx.\n", ret); + } while (ret == HV_STATUS_TIME_OUT && retry--); + + local_irq_restore(flags); + + if (hv_result_success(ret)) + return ES_OK; + else + return ES_VMM_ERROR; +} + int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) { struct sev_es_save_area *vmsa = (struct sev_es_save_area *) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index a0c992faa1e9c..910b03d74c85b 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -311,6 +311,14 @@ enum hv_isolation_type { #define HV_MSR_STIMER0_CONFIG (HV_X64_MSR_STIMER0_CONFIG) #define HV_MSR_STIMER0_COUNT (HV_X64_MSR_STIMER0_COUNT) +/* + * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and + * there is not associated MSR address. + */ +#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003 +#define HV_X64_VTL_MASK GENMASK(3, 0) +#define HV_X64_REGISTER_SEV_AVIC_GPA 0x00090043 + /* Hyper-V memory host visibility */ enum hv_mem_host_visibility { VMBUS_PAGE_NOT_VISIBLE = 0, diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f8a108dadaf26..1addcdbe7c6f6 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -43,6 +43,7 @@ static inline unsigned char hv_get_nmi_reason(void) extern bool hyperv_paravisor_present; extern void *hv_hypercall_pg; +extern void *hv_vp_early_input_arg; extern u64 hv_current_partition_id; @@ -265,6 +266,7 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu); +enum es_result hv_set_savic_backing_page(u64 gfn); #else static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index a3f0ddc6b5b62..d82a5e82e3e57 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "local.h" @@ -396,6 +397,10 @@ static void x2apic_savic_setup(void) void *backing_page; enum es_result ret; unsigned long gpa; + unsigned long gfn; + + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; if (this_cpu_read(savic_setup_done)) return; @@ -403,7 +408,13 @@ static void x2apic_savic_setup(void) backing_page = this_cpu_read(apic_backing_page); init_backing_page(backing_page); gpa = __pa(backing_page); - ret = sev_notify_savic_gpa(gpa); + gfn = gpa >> PAGE_SHIFT; + + if (hv_isolation_type_snp()) + ret = hv_set_savic_backing_page(gfn); + else + ret = sev_notify_savic_gpa(gpa); + if (ret != ES_OK) snp_abort(); savic_wr_control_msr(gpa | MSR_AMD64_SECURE_AVIC_EN | MSR_AMD64_SECURE_AVIC_ALLOWEDNMI); diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 02f0a4ab723e8..ddeef1ebbad8c 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -749,6 +749,23 @@ struct hv_get_vp_registers_output { }; }; +union hv_x64_register_sev_gpa_page { + u64 u64; + struct { + u64 enabled:1; + u64 reserved:11; + u64 pagenumber:52; + }; +} __packed; + +union hv_register_value { + u128 reg128; + u64 reg64; + u32 reg32; + u16 reg16; + u8 reg8; +}; + /* HvSetVpRegisters hypercall with variable size reg name/value list*/ struct hv_set_vp_registers_input { struct { @@ -761,8 +778,13 @@ struct hv_set_vp_registers_input { u32 name; u32 padding1; u64 padding2; - u64 valuelow; - u64 valuehigh; + union { + union hv_register_value value; + struct { + u64 valuelow; + u64 valuehigh; + }; + }; } element[]; } __packed; From e58ae5da1da893eb82610669a0297ac84d001641 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 18:19:21 -0400 Subject: [PATCH 16/26] x86/Hyper-V: Not use hv apic driver when Secure AVIC is available When Secure AVIC is available, AMD x2apic Secure AVIC driver should be selected and return directly in the hv_apic_init(). Signed-off-by: Tianyu Lan --- arch/x86/hyperv/hv_apic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 0569f579338b5..34987b2234184 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -288,6 +288,9 @@ static void hv_send_ipi_self(int vector) void __init hv_apic_init(void) { + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { pr_info("Hyper-V: Using IPI hypercalls\n"); /* From c41fab2f2336424772d1dda6a80c9ebc706ea372 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 19:35:12 -0400 Subject: [PATCH 17/26] x86/x2apic-savic: Expose x2apic_savic_update_vector() Expose x2apic_savic_update_vector() to allow driver or arch code to allow Hyper-V inject related vector. Signed-off-by: Tianyu Lan --- arch/x86/include/asm/apic.h | 9 +++++++++ arch/x86/kernel/apic/x2apic_savic.c | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1ab0e22a71873..db4e84484eeef 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -241,6 +241,15 @@ static inline u64 native_x2apic_icr_read(void) return val; } +#if defined(CONFIG_AMD_SECURE_AVIC) +extern void x2apic_savic_update_vector(unsigned int cpu, + unsigned int vector, + bool set); +#else +static inline void x2apic_savic_update_vector(unsigned int cpu, + unsigned int vector, bool set) { } +#endif + extern int x2apic_mode; extern int x2apic_phys; extern void __init x2apic_set_max_apicid(u32 apicid); diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index d82a5e82e3e57..deb202dbb0208 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -343,12 +343,15 @@ static void x2apic_savic_send_IPI_self(int vector) __send_IPI_shorthand(vector, APIC_DEST_SELF); } -static void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) +void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) { void *backing_page; unsigned long *reg; int reg_off; + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + backing_page = per_cpu(apic_backing_page, cpu); reg_off = SAVIC_ALLOWED_IRR_OFFSET + REG_POS(vector); reg = (unsigned long *)((char *)backing_page + reg_off); From e2940712742501db2d6904038d1c62a612a84577 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 18:29:23 -0400 Subject: [PATCH 18/26] drivers/hv: Allow vmbus message synic interrupt injected from Hyper-V When Secure AVIC is enabled, Vmbus driver should call x2apic Secure AVIC interface to allow Hyper-V to inject Vmbus message interrupt. Signed-off-by: Tianyu Lan --- drivers/hv/hv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index fc8729deb6597..c28e60e8c3991 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "hyperv_vmbus.h" @@ -312,6 +313,7 @@ void hv_synic_enable_regs(unsigned int cpu) if (vmbus_irq != -1) enable_percpu_irq(vmbus_irq, 0); shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT); + x2apic_savic_update_vector(smp_processor_id(), vmbus_interrupt, true); shared_sint.vector = vmbus_interrupt; shared_sint.masked = false; From 3c993994334b796beb8eca11a291d4842b8b4076 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 18:54:55 -0400 Subject: [PATCH 19/26] x86/Hyper-V: Allow Hyper-V to inject Hyper-V vectors When Secure AVIC is enabled, call Secure AVIC function to allow Hyper-V to inject REENLIGHTENMENT, STIMER0 and CALLBACK vectors. Signed-off-by: Tianyu Lan --- arch/x86/hyperv/hv_init.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b31fabf091bb5..930083663a081 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -85,6 +85,17 @@ static int hv_cpu_init(unsigned int cpu) if (ret) return ret; + /* Allow Hyper-V vector to be injected from Hypervisor. */ + if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) + x2apic_savic_update_vector(cpu, + HYPERV_REENLIGHTENMENT_VECTOR, true); + + if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) + x2apic_savic_update_vector(cpu, + HYPERV_STIMER0_VECTOR, true); + + x2apic_savic_update_vector(cpu, HYPERVISOR_CALLBACK_VECTOR, true); + return hyperv_init_ghcb(); } From 5e94781fef1ee74d8f249a10163f64fb70e1f713 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 18:42:42 -0400 Subject: [PATCH 20/26] x86/Hyper-V: Not use auto-eoi when Secure AVIC is available Hyper-V doesn't support auto-eoi with Secure AVIC. So Enable HV_DEPRECATING_AEOI_RECOMMENDED flag to force to write eoi register after handling interrupt. Signed-off-by: Tianyu Lan --- arch/x86/kernel/cpu/mshyperv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c6797cf9f37cb..c2ae8ea213b01 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -460,6 +460,9 @@ static void __init ms_hyperv_init_platform(void) pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + ms_hyperv.hints |= HV_DEPRECATING_AEOI_RECOMMENDED; + /* * Check CPU management privilege. * From 9b69252ced710c2db49ba2417c940c05f5367f5c Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 19:38:01 -0400 Subject: [PATCH 21/26] x86/x2apic-savic: Not set APIC backing page if Secure AVIC is not enabled. When Secure AVIC is not enabled, init_backing_page() should return directly. Signed-off-by: Tianyu Lan --- arch/x86/kernel/apic/x2apic_savic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index deb202dbb0208..96a6f3889f535 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -369,6 +369,9 @@ static void init_backing_page(void *backing_page) u32 val; int i; + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + val = read_msr_from_hv(APIC_LVR); set_reg(backing_page, APIC_LVR, val); From c83626f7654efdba6c2b0820d0065f3ea22a09b6 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 3 Apr 2025 21:28:49 -0400 Subject: [PATCH 22/26] x64-cvm.config: Add Secure AVIC driver for CVM Select AMD Secure AVIC driver in the CVM config file. Signed-off-by: Tianyu Lan --- Microsoft/x64-cvm.config | 1 + 1 file changed, 1 insertion(+) diff --git a/Microsoft/x64-cvm.config b/Microsoft/x64-cvm.config index 054a91783c73b..b5ae5b1f17878 100644 --- a/Microsoft/x64-cvm.config +++ b/Microsoft/x64-cvm.config @@ -3,5 +3,6 @@ CONFIG_VIRT_DRIVERS=y CONFIG_TDX_GUEST_DRIVER=y CONFIG_SEV_GUEST=y CONFIG_AMD_MEM_ENCRYPT=y +CONFIG_AMD_SECURE_AVIC=y CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_LIB_AES=y From 8ee2e35c8c1b9174e809f271eab0346a4ec64419 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Fri, 21 Mar 2025 22:40:14 +0000 Subject: [PATCH 23/26] x86/hyperv: fix an indentation issue in mshyperv.h Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503220640.hjiacW2C-lkp@intel.com/ Signed-off-by: Wei Liu --- arch/x86/include/asm/mshyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 1addcdbe7c6f6..1e1da684db4ba 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -161,7 +161,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) : "cc", "edi", "esi"); } #endif - return hv_status; + return hv_status; } static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) From 6be47e667450121252cd749fb4da3332de0d4d2e Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Fri, 2 May 2025 13:02:18 -0700 Subject: [PATCH 24/26] fixes for Tianyu's patches --- arch/x86/hyperv/hv_init.c | 4 ++-- arch/x86/hyperv/ivm.c | 11 ++--------- drivers/hv/hv.c | 5 ++++- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 930083663a081..bf20e655076f3 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -382,7 +382,7 @@ void __init hyperv_init(void) PAGE_SIZE, GFP_KERNEL); if (hv_vp_early_input_arg) { - ret = set_memory_decrypted(hv_vp_early_input_arg, + ret = set_memory_decrypted((u64)hv_vp_early_input_arg, num_possible_cpus()); if (ret) { kfree(hv_vp_early_input_arg); @@ -568,7 +568,7 @@ void __init hyperv_init(void) kfree(hv_vp_assist_page); hv_vp_assist_page = NULL; free_vp_early_input_arg: - set_memory_encrypted(hv_vp_early_input_arg, num_possible_cpus()); + set_memory_encrypted((u64)hv_vp_early_input_arg, num_possible_cpus()); kfree(hv_vp_early_input_arg); hv_vp_early_input_arg = NULL; common_free: diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 829e5ac17982d..ba0abdd7a343c 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -65,13 +65,6 @@ union hv_ghcb { /* Only used in an SNP VM with the paravisor */ static u16 hv_ghcb_version __ro_after_init; -/* - * Use static page to set Secure AVIC backing page. - * The operation happens before allocating input arg - * page when start AP. - */ -static u8 inputbuf[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE); - /* Functions only used in an SNP VM with the paravisor go here. */ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) { @@ -321,9 +314,9 @@ enum es_result hv_set_savic_backing_page(u64 gfn) do { ret = hv_do_hypercall(control, input, NULL); - if (!hv_result_success(ret)) - pr_err("Failed to set secure AVIC backing page %llx.\n", ret); } while (ret == HV_STATUS_TIME_OUT && retry--); + if (!hv_result_success(ret)) + pr_err("Failed to set secure AVIC backing page %llx.\n", ret); local_irq_restore(flags); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index c28e60e8c3991..92003b8004f75 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -20,7 +20,9 @@ #include #include #include +#ifdef CONFIG_SEV_GUEST #include +#endif #include #include "hyperv_vmbus.h" @@ -313,8 +315,9 @@ void hv_synic_enable_regs(unsigned int cpu) if (vmbus_irq != -1) enable_percpu_irq(vmbus_irq, 0); shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT); +#ifdef CONFIG_SEV_GUEST x2apic_savic_update_vector(smp_processor_id(), vmbus_interrupt, true); - +#endif shared_sint.vector = vmbus_interrupt; shared_sint.masked = false; From eaff89ba1a104b21107720421b1f296c6d2adca8 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Tue, 24 Jun 2025 17:30:08 -0700 Subject: [PATCH 25/26] drivers: hv: mshv_vtl: Support for Secure AVIC --- arch/x86/include/asm/apic.h | 1 + arch/x86/include/asm/sev.h | 7 +- arch/x86/include/asm/svm.h | 12 +- arch/x86/include/uapi/asm/svm.h | 6 +- arch/x86/kernel/apic/x2apic_savic.c | 4 +- drivers/hv/mshv_vtl_main.c | 743 +++++++++++++++++++++------- include/uapi/linux/mshv.h | 1 + 7 files changed, 580 insertions(+), 194 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index db4e84484eeef..e504b5e597ede 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -245,6 +245,7 @@ static inline u64 native_x2apic_icr_read(void) extern void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set); +extern void x2apic_savic_init_backing_page(void *backing_page); #else static inline void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) { } diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index ced3d8014ef4a..89bc19d3f6c1c 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -109,7 +109,12 @@ struct rmp_state { u32 asid; } __packed; -#define RMPADJUST_VMSA_PAGE_BIT BIT(16) +/* Target VMPL takes the first byte */ +#define RMPADJUST_ENABLE_READ BIT(8) +#define RMPADJUST_ENABLE_WRITE BIT(9) +#define RMPADJUST_USER_EXECUTE BIT(10) +#define RMPADJUST_KERNEL_EXECUTE BIT(11) +#define RMPADJUST_VMSA_PAGE_BIT BIT(16) /* SNP Guest message request */ struct snp_req_data { diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 2b59b9951c90e..d5207e9badd3c 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -5,7 +5,8 @@ #include #include -#include +/* TODO: including into mshv_vtl_main.c breaks the build. */ +// #include /* * 32-bit intercept words in the VMCB Control Area, starting @@ -164,7 +165,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { * for use by hypervisor/software. */ union { - struct hv_vmcb_enlightenments hv_enlightenments; + /* TODO: including into mshv_vtl_main.c breaks the build. */ + // struct hv_vmcb_enlightenments hv_enlightenments; u8 reserved_sw[32]; }; }; @@ -183,6 +185,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_GIF_SHIFT 9 #define V_GIF_MASK (1 << V_GIF_SHIFT) +#define V_INT_SHADOW 10 +#define V_INT_SHADOW_MASK (1 << V_INT_SHADOW) + #define V_NMI_PENDING_SHIFT 11 #define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT) @@ -195,6 +200,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IGN_TPR_SHIFT 20 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) +#define V_GUEST_BUSY_SHIFT 63 +#define V_GUEST_BUSY_MASK (1ULL << V_GUEST_BUSY_SHIFT) + #define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) #define V_INTR_MASKING_SHIFT 24 diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 0f21cea6d21c7..89876c35dd115 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -94,8 +94,10 @@ #define SVM_EXIT_CR13_WRITE_TRAP 0x09d #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f -#define SVM_EXIT_INVPCID 0x0a2 -#define SVM_EXIT_NPF 0x400 +#define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_BUSLOCK 0x0a5 +#define SVM_EXIT_IDLE_HLT 0x0a6 +#define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 #define SVM_EXIT_VMGEXIT 0x403 diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 96a6f3889f535..9c6181229165d 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -362,7 +362,7 @@ void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set) test_and_clear_bit(VEC_POS(vector), reg); } -static void init_backing_page(void *backing_page) +void x2apic_savic_init_backing_page(void *backing_page) { u32 hv_apic_id; u32 apic_id; @@ -412,7 +412,7 @@ static void x2apic_savic_setup(void) return; backing_page = this_cpu_read(apic_backing_page); - init_backing_page(backing_page); + x2apic_savic_init_backing_page(backing_page); gpa = __pa(backing_page); gfn = gpa >> PAGE_SHIFT; diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c index b727c76d17b4f..b587cce8fc82f 100644 --- a/drivers/hv/mshv_vtl_main.c +++ b/drivers/hv/mshv_vtl_main.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -167,6 +168,9 @@ struct mshv_vtl_per_cpu { bool msrs_are_guest; struct user_return_notifier mshv_urn; #endif +#if defined(CONFIG_X86_64) && defined(CONFIG_SEV_GUEST) + struct page *snp_secure_avic_page; +#endif }; static struct mutex mshv_vtl_poll_file_lock; @@ -196,20 +200,66 @@ static struct page *mshv_vtl_cpu_reg_page(int cpu) return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu); } -#if defined(CONFIG_X86_64) && defined(CONFIG_INTEL_TDX_GUEST) +#if defined(CONFIG_X86_64) + +#if defined(CONFIG_INTEL_TDX_GUEST) + +static struct page *tdx_this_apic_page(void) +{ + return *this_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page); +} + +static u32 *mshv_tdx_vapic_irr(void) +{ + return (u32 *)((char *)page_address(tdx_this_apic_page()) + APIC_IRR); +} + +#endif /* defined(CONFIG_INTEL_TDX_GUEST) */ static struct page *tdx_apic_page(int cpu) { +#if defined(CONFIG_INTEL_TDX_GUEST) return *per_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page, cpu); +#else + (void)cpu; + return NULL; +#endif } -static struct page *tdx_this_apic_page(void) +static struct page *snp_secure_avic_page(int cpu) { - return *this_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page); +#if defined(CONFIG_SEV_GUEST) + return *per_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page, cpu); +#else + (void)cpu; + return NULL; +#endif } +static u32 *mshv_snp_secure_avic_irr(int cpu) +{ +#if defined(CONFIG_SEV_GUEST) + return (u32 *)((char *)page_address(snp_secure_avic_page(cpu)) + APIC_IRR); +#else + (void)cpu; + return NULL; +#endif +} + +static struct page* mshv_apic_page(int cpu) +{ + if (hv_isolation_type_tdx()) + return tdx_apic_page(cpu); + else if (hv_isolation_type_snp()) + return snp_secure_avic_page(cpu); + + return NULL; +} + +#if defined(CONFIG_SEV_GUEST) || defined(CONFIG_INTEL_TDX_GUEST) /* - * For ICR emulation on TDX, we need a fast way to map APICIDs to CPUIDs. + * For ICR emulation when running a hardware isolated guest, we need a fast way to map + * APICIDs to CPUIDs. * Instead of iterating through all CPUs for each target in the ICR destination field * precompute a mapping. APICIDs can be sparse so we have to use a hash table. * Note: CPU hotplug is not supported (both by this code and by the paravisor in general) @@ -225,21 +275,250 @@ struct apicid_to_cpuid_entry { * Sets the cpu described by apicid in cpu_mask. * Returns 0 on success, -EINVAL if no cpu matches the apicid. */ -static int mshv_tdx_set_cpumask_from_apicid(int apicid, struct cpumask *cpu_mask) +static int mshv_set_cpumask_from_apicid(int apicid, struct cpumask *cpu_mask) { struct apicid_to_cpuid_entry *found; hash_for_each_possible(apicid_to_cpuid, found, node, apicid) { if (found->apicid != apicid) continue; - cpumask_set_cpu(found->cpuid, cpu_mask); return 0; } return -EINVAL; } -#endif + +/* + * Returns the cpumask described by dest, where dest is a logical destination. + * cpu_mask should have no CPUs set. + * Returns 0 on success + */ +static int mshv_get_logical_cpumask(u32 dest, struct cpumask *cpu_mask) +{ + int ret = 0; + + while ((u16)dest) { + const u16 i = fls((u16)dest) - 1; + const u32 physical_id = (dest >> 16 << 4) | i; + + ret = mshv_set_cpumask_from_apicid(physical_id, cpu_mask); + dest &= ~BIT(i); + if (ret) + break; + } + + return ret; +} + +/* + * Interrupt handling (particularly sending (via ICR writes) and receiving interrupts), + * is a hot path on hardware-isolated VMs. By performing some of the common functionality + * entirely in-kernel we eliminate costly user<->kernel transitions. + */ +static void mshv_free_apicid_to_cpuid_mapping(void) +{ + int bkt; + struct apicid_to_cpuid_entry *entry; + struct hlist_node *tmp; + + hash_for_each_safe(apicid_to_cpuid, bkt, tmp, entry, node) { + hash_del(&entry->node); + kfree(entry); + } +} + +/* + * Creates and populates the apicid_to_cpuid hash table. + * This mapping is used for fast ICR emulation on on hardware-isolated VMs. + * Returns 0 on success. + */ +static int mshv_create_apicid_to_cpuid_mapping(struct device *dev) +{ + int cpu, ret = 0; + + for_each_online_cpu(cpu) { + struct apicid_to_cpuid_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + + if (!entry) { + ret = -ENOMEM; + break; + } + + entry->apicid = cpuid_to_apicid[cpu]; + entry->cpuid = cpu; + + if (entry->apicid == BAD_APICID) { + dev_emerg(dev, "Bad APICID: %d !!\n", entry->apicid); + ret = -ENODEV; + break; + } + + hash_add(apicid_to_cpuid, &entry->node, entry->apicid); + } + + if (ret) + mshv_free_apicid_to_cpuid_mapping(); + + return ret; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + */ +static int mshv_cpu_mask_for_icr_write(u32 icr_lo, u32 dest, struct cpumask* local_mask) +{ + const u8 shorthand = (icr_lo >> 18) & 0b11; + const u32 self = smp_processor_id(); + int ret = 0; + + cpumask_clear(local_mask); + if (shorthand == 0b10 || dest == (u32)-1) { /* shorthand all or destination id == all */ + cpumask_copy(local_mask, cpu_online_mask); + } else if (shorthand == 0b11) { /* shorthand all but self */ + cpumask_copy(local_mask, cpu_online_mask); + cpumask_clear_cpu(self, local_mask); + } else if (shorthand == 0b01) { /* shorthand self */ + cpumask_set_cpu(self, local_mask); + } else if (icr_lo & BIT(11)) { /* logical */ + ret = mshv_get_logical_cpumask(dest, local_mask); + } else { /* physical */ + ret = mshv_set_cpumask_from_apicid(dest, local_mask); + } + + return ret; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + */ +static int mshv_update_proxy_irr_for_icr_write(u32 icr_lo, struct cpumask *local_mask) +{ + const u8 vector = icr_lo; + const u64 bank = vector / 32; + const u32 mask = BIT(vector % 32); + const u32 self = smp_processor_id(); + + unsigned int cpu; + bool send_ipi; + + send_ipi = false; + for_each_cpu(cpu, local_mask) { + /* + * The kernel doesn't provide an atomic_or which operates on u32, + * so cast to atomic_t, which should have the same layout + */ + static_assert(sizeof(atomic_t) == sizeof(u32)); + atomic_or(mask, (atomic_t *) + (&(mshv_vtl_cpu_run(cpu)->proxy_irr[bank]))); + smp_store_release(&mshv_vtl_cpu_run(cpu)->scan_proxy_irr, 1); + send_ipi |= cpu != self; + } + + if (send_ipi) { + cpumask_clear_cpu(self, local_mask); + __apic_send_IPI_mask(local_mask, RESCHEDULE_VECTOR); + } + + return 0; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + * Secure AVIC accelerates self-IPI only. + */ +static int mshv_snp_handle_simple_icr_write(u32 icr_lo, u32 dest) +{ + const u8 vector = icr_lo; + + struct cpumask local_mask; + unsigned int cpu; + int ret; + + ret = mshv_cpu_mask_for_icr_write(icr_lo, dest, &local_mask); + if (ret) + return ret; + ret = mshv_update_proxy_irr_for_icr_write(icr_lo, &local_mask); + if (ret) + return ret; + + // Probobaly shouldn't update the target VP's IRRs to inject the + // interrupt, there might be more state to account for. The target + // VP will go into the user mode anyway, not much to be saved? + + // for_each_cpu(cpu, &local_mask) { + // u64 irr_reg_off; + // unsigned long *irr_reg; + // void* irr; + + // /* + // * IRRs are banked into eight 32-bit registers each starting on the + // * 16-byte boundary (4 byte of an IRR + 12 byte stride). + // */ + // irr_reg_off = (vector >> 5) << 4; + // irr = mshv_snp_secure_avic_irr(cpu); + // irr_reg = (unsigned long*)((u8*)irr + irr_reg_off); + + // /* Inject the interrupt. */ + // test_and_set_bit(vector & 0x1f, irr_reg); + // } + + return 0; +} + +#else + +static void mshv_free_apicid_to_cpuid_mapping(void) {} +static int mshv_create_apicid_to_cpuid_mapping(struct device *) { return 0; } +static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *) { return false; } +static bool mshv_snp_try_handle_exit(struct mshv_vtl_run *) { return false; } + +#endif /* defined(CONFIG_SEV_GUEST) || defined(CONFIG_INTEL_TDX_GUEST) */ + +/* + * Pull the interrupts in the `proxy_irr` field into the VAPIC page + * Returns true if an exit to user-space is required (sync tmr state) + */ +static bool __mshv_pull_proxy_irr(struct mshv_vtl_run *run, struct page *apic_page) +{ + u32 *apic_page_irr = (u32 *)((char *)page_address(apic_page) + APIC_IRR); + + if (!xchg(&run->scan_proxy_irr, 0) || !apic_page_irr) + return false; + + for (int i = 0; i < 8; i++) { + const u32 val = xchg(&run->proxy_irr[i], 0); + + if (!val) + continue; + + if (run->proxy_irr_exit_mask[i] & val) { + /* + * This vector was previously used for a level-triggered interrupt. + * An edge-triggered interrupt has now arrived, so we need to involve + * user-space to clear its copy of the tmr. + * Put the interrupt(s) back on the run page so it can do so. + * nb atomic_t cast: See comment in mshv_tdx_handle_simple_icr_write + */ + atomic_or(val, (atomic_t *)(&run->proxy_irr[i])); + WRITE_ONCE(run->scan_proxy_irr, 1); + return true; + } + + /* + * IRR is non-contiguous. + * Each bank is 4 bytes with 12 bytes of padding between banks. + */ + apic_page_irr[i * 4] |= val; + } + + return false; +} + +#endif /* defined(CONFIG_X86_64) */ static long __mshv_vtl_ioctl_check_extension(u32 arg) { @@ -320,7 +599,7 @@ static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu) } #ifdef CONFIG_X86_64 -static int mshv_configure_vmsa_page(u8 target_vtl, struct page** vmsa_page) +static int mshv_snp_configure_vmsa_page(u8 target_vtl, struct page** vmsa_page) { struct page *page; struct hv_register_assoc reg_assoc = {}; @@ -469,6 +748,7 @@ static void mshv_vtl_scan_proxy_interrupts(struct hv_per_cpu_context *per_cpu) } else { /* A malicious hypervisor might set a vector > 255. */ vector = READ_ONCE(proxy->u.asserted_vector) & 0xff; + const u32 bank = vector / 32; const u32 masked_irr = BIT(vector % 32) & ~READ_ONCE(run->proxy_irr_blocked[bank]); @@ -626,16 +906,43 @@ static int mshv_vtl_alloc_context(unsigned int cpu) mshv_write_tdx_apic_page(page_to_phys(tdx_apic_page)); #endif } else if (hv_isolation_type_snp()) { -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_64) && defined(CONFIG_SEV_GUEST) + struct page *snp_secure_avic_page; + u64 apic_id; int ret; - ret = mshv_configure_vmsa_page(0, &per_cpu->vmsa_page); + ret = mshv_snp_configure_vmsa_page(0, &per_cpu->vmsa_page); if (ret < 0) return ret; + + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + goto synic; + + snp_secure_avic_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!snp_secure_avic_page) + return -ENOMEM; + + /* VMPL 2 for the VTL0 */ + ret = rmpadjust((unsigned long)page_address(snp_secure_avic_page), + RMP_PG_SIZE_4K, 2 | RMPADJUST_ENABLE_READ | RMPADJUST_ENABLE_WRITE); + if (ret) { + pr_err("failed to adjust RMP for the secure AVIC page: %d\n", ret); + free_page((u64)snp_secure_avic_page); + return -EINVAL; + } + + /* Some very basic initialization */ + // ret = sev_ghcb_msr_read(APIC_BASE_MSR + (APIC_ID >> 4), &apic_id); + // BUG_ON(ret != ES_OK); + // WRITE_ONCE(*((u32*)page_address(snp_secure_avic_page) + APIC_ID), lower_32_bits(apic_id)); + x2apic_savic_init_backing_page(page_address(snp_secure_avic_page)); // ??? + + per_cpu->snp_secure_avic_page = snp_secure_avic_page; #endif } else if (mshv_vsm_capabilities.intercept_page_available) mshv_vtl_configure_reg_page(per_cpu); +synic: mshv_vtl_synic_enable_regs(cpu); return 0; @@ -997,62 +1304,7 @@ static void mshv_vtl_idle(void) #define enter_mode(mode) ((mode) & MODE_MASK) #define reenter_mode(mode) (((mode) >> REENTER_SHIFT) & MODE_MASK) -/* - * Interrupt handling (particularly sending (via ICR writes) and receiving interrupts), - * is a hot path on TDX. By performing some of the common functionality entirely in-kernel - * we eliminate costly user<->kernel transitions. - */ -#ifndef CONFIG_INTEL_TDX_GUEST -static void mshv_tdx_free_apicid_to_cpuid_mapping(void) {} -static int mshv_tdx_create_apicid_to_cpuid_mapping(struct device *) { return 0; } -static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *) { return false; } -#else -static void mshv_tdx_free_apicid_to_cpuid_mapping(void) -{ - int bkt; - struct apicid_to_cpuid_entry *entry; - struct hlist_node *tmp; - - hash_for_each_safe(apicid_to_cpuid, bkt, tmp, entry, node) { - hash_del(&entry->node); - kfree(entry); - } -} - -/* - * Creates and populates the apicid_to_cpuid hash table. - * This mapping is used for fast ICR emulation on TDX. - * Returns 0 on success. - */ -static int mshv_tdx_create_apicid_to_cpuid_mapping(struct device *dev) -{ - int cpu, ret = 0; - - for_each_online_cpu(cpu) { - struct apicid_to_cpuid_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); - - if (!entry) { - ret = -ENOMEM; - break; - } - - entry->apicid = cpuid_to_apicid[cpu]; - entry->cpuid = cpu; - - if (entry->apicid == BAD_APICID) { - dev_emerg(dev, "Bad APICID: %d !!\n", entry->apicid); - ret = -ENODEV; - break; - } - - hash_add(apicid_to_cpuid, &entry->node, entry->apicid); - } - - if (ret) - mshv_tdx_free_apicid_to_cpuid_mapping(); - - return ret; -} +#ifdef CONFIG_INTEL_TDX_GUEST static void mshv_tdx_advance_to_next_instruction(struct tdx_vp_context *context) { @@ -1090,28 +1342,6 @@ static bool mshv_tdx_is_simple_icr_write(const struct tdx_vp_context *context) return fixed && edge; } -/* - * Returns the cpumask described by dest, where dest is a logical destination. - * cpu_mask should have no CPUs set. - * Returns 0 on success - */ -static int mshv_tdx_get_logical_cpumask(u32 dest, struct cpumask *cpu_mask) -{ - int ret = 0; - - while ((u16)dest) { - const u16 i = fls((u16)dest) - 1; - const u32 physical_id = (dest >> 16 << 4) | i; - - ret = mshv_tdx_set_cpumask_from_apicid(physical_id, cpu_mask); - dest &= ~BIT(i); - if (ret) - break; - } - - return ret; -} - /* * Attempts to handle an ICR write. Returns 0 if successful, other values * indicate user-space should be invoked to gracefully handle the error. @@ -1120,101 +1350,21 @@ static int mshv_tdx_handle_simple_icr_write(struct tdx_vp_context *context) { const u32 icr_lo = context->l2_enter_guest_state.rax; const u32 dest = context->l2_enter_guest_state.rdx; - const u8 shorthand = (icr_lo >> 18) & 0b11; - const u8 vector = icr_lo; - const u64 bank = vector / 32; - const u32 mask = BIT(vector % 32); - const u32 self = smp_processor_id(); - - bool send_ipi = false; struct cpumask local_mask = {}; - unsigned int cpu = 0; int ret = 0; - if (shorthand == 0b10 || dest == (u32)-1) { /* shorthand all or destination id == all */ - cpumask_copy(&local_mask, cpu_online_mask); - } else if (shorthand == 0b11) { /* shorthand all but self */ - cpumask_copy(&local_mask, cpu_online_mask); - cpumask_clear_cpu(self, &local_mask); - } else if (shorthand == 0b01) { /* shorthand self */ - cpumask_set_cpu(self, &local_mask); - } else if (icr_lo & BIT(11)) { /* logical */ - ret = mshv_tdx_get_logical_cpumask(dest, &local_mask); - } else { /* physical */ - ret = mshv_tdx_set_cpumask_from_apicid(dest, &local_mask); - } - + ret = mshv_cpu_mask_for_icr_write(icr_lo, dest, &local_mask); + if (ret) + return ret; + ret = mshv_update_proxy_irr_for_icr_write(icr_lo, &local_mask); if (ret) return ret; - - for_each_cpu(cpu, &local_mask) { - /* - * The kernel doesn't provide an atomic_or which operates on u32, - * so cast to atomic_t, which should have the same layout - */ - static_assert(sizeof(atomic_t) == sizeof(u32)); - atomic_or(mask, (atomic_t *) - (&(mshv_vtl_cpu_run(cpu)->proxy_irr[bank]))); - smp_store_release(&mshv_vtl_cpu_run(cpu)->scan_proxy_irr, 1); - send_ipi |= cpu != self; - } - - if (send_ipi) { - cpumask_clear_cpu(self, &local_mask); - __apic_send_IPI_mask(&local_mask, RESCHEDULE_VECTOR); - } - mshv_tdx_advance_to_next_instruction(context); mshv_tdx_clear_exit_reason(context); return 0; } -static u32 *mshv_tdx_vapic_irr(void) -{ - return (u32 *)((char *)page_address(tdx_this_apic_page()) + APIC_IRR); -} - -/* - * Pull the interrupts in the `proxy_irr` field into the VAPIC page - * Returns true if an exit to user-space is required (sync tmr state) - */ -static bool mshv_tdx_pull_proxy_irr(struct mshv_vtl_run *run) -{ - u32 *apic_page_irr = mshv_tdx_vapic_irr(); - - if (!xchg(&run->scan_proxy_irr, 0)) - return false; - - for (int i = 0; i < 8; i++) { - const u32 val = xchg(&run->proxy_irr[i], 0); - - if (!val) - continue; - - if (run->proxy_irr_exit_mask[i] & val) { - /* - * This vector was previously used for a level-triggered interrupt. - * An edge-triggered interrupt has now arrived, so we need to involve - * user-space to clear its copy of the tmr. - * Put the interrupt(s) back on the run page so it can do so. - * nb atomic_t cast: See comment in mshv_tdx_handle_simple_icr_write - */ - atomic_or(val, (atomic_t *)(&run->proxy_irr[i])); - WRITE_ONCE(run->scan_proxy_irr, 1); - return true; - } - - /* - * IRR is non-contiguous. - * Each bank is 4 bytes with 12 bytes of padding between banks. - */ - apic_page_irr[i * 4] |= val; - } - - return false; -} - /* * Checks if exit reason is due: * - An interrupt for the L1 @@ -1334,6 +1484,179 @@ static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *run) } #endif /* CONFIG_INTEL_TDX_GUEST */ +#if defined(CONFIG_SEV_GUEST) + +static struct page *snp_this_savic_page(void) +{ + return *this_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page); +} + +static struct sev_es_save_area *snp_this_vmsa(void) +{ + struct page *vmsa_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_page); + return page_address(vmsa_page); +} + +/* + * Sets a benign guest error code so that there won't be another + * #VMEXIT for the just processed one and marks the VMSA as + * runnable. + */ +static void mshv_snp_clear_exit_code(struct sev_es_save_area *vmsa, bool int_shadow) +{ + if (int_shadow) + vmsa->vintr_ctrl |= V_INT_SHADOW_MASK; + else + vmsa->vintr_ctrl &= ~V_INT_SHADOW_MASK; + vmsa->guest_exit_code = SVM_EXIT_INTR; + vmsa->vintr_ctrl &= ~V_GUEST_BUSY_MASK; +} + +/* + * Try to handle the incomplete IPI SEV-SNP exit. + * + * Returns true if the exit was handled entirely in kernel, and the VMPL should be re-entered. + * Returns false if the exit must be handled by user-space. + */ +static bool mshv_snp_try_handle_incomplete_ipi(struct mshv_vtl_run *run, + struct sev_es_save_area *vmsa) +{ + u32 icr_lo = vmsa->guest_exit_info_1; + u32 dest = vmsa->guest_exit_info_1 >> 32; + + /* Route the INIT, SIPI, NMI to the user mode for now. */ + if ((icr_lo & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) + return false; + /* Can handle only edge-triggered interrupts. */ + if (icr_lo & APIC_INT_LEVELTRIG) + return false; + + if (mshv_snp_handle_simple_icr_write(icr_lo, dest)) + return false; + + return true; +} + +/* + * Try to handle an SEV-SNP exit entirely in kernel, to avoid the overhead of a + * user<->kernel transition. + * + * Returns true if the exit was handled entirely in kernel, and the VMPL should be re-entered. + * Returns false if the exit must be handled by user-space. + */ +static bool mshv_snp_try_handle_exit(struct mshv_vtl_run *run) +{ + const bool intr_inject = MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT & run->offload_flags; + const bool x2apic = MSHV_VTL_OFFLOAD_FLAG_X2APIC & run->offload_flags; + struct sev_es_save_area *vmsa; + u8 *offload_flags; + + if (!intr_inject || !x2apic) + return false; + + vmsa = snp_this_vmsa(); + + switch (vmsa->guest_exit_code) + { + case SVM_EXIT_AVIC_INCOMPLETE_IPI: + if (mshv_snp_try_handle_incomplete_ipi(run, vmsa)) + goto handled; + break; + case SVM_EXIT_HLT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + goto handled; + case SVM_EXIT_IDLE_HLT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + goto handled; + case SVM_EXIT_MSR: + if (vmsa->rcx == HV_X64_MSR_GUEST_IDLE && !(vmsa->guest_exit_info_1 & 1)) { + /* The guest indicates it's idle by reading this synthetic MSR. */ + vmsa->rax = 0; + vmsa->rdx = 0; + vmsa->rip = vmsa->guest_nrip; + + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + + goto handled; + } + break; + default: + break; + } + + offload_flags = &run->offload_flags; + (*offload_flags) &= ~MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + (*offload_flags) &= ~MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + if (!(*offload_flags & MSHV_VTL_OFFLOAD_FLAG_HALT_OTHER)) + run->flags &= ~MSHV_VTL_RUN_FLAG_HALTED; + + return false; + +handled: + + mshv_snp_clear_exit_code(vmsa, false); + return true; +} + +static bool mshv_snp_try_handle_intercept(struct mshv_vtl_run *run) +{ + struct hv_vp_assist_page *hvp = hv_vp_assist_page[smp_processor_id()]; + u32 msg_type = HVMSG_NONE; + struct hv_message *msg = NULL; + + switch (hvp->vtl_entry_reason) { + case MSHV_ENTRY_REASON_INTERRUPT: + if (!mshv_vsm_capabilities.intercept_page_available) + { + struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context); + void *synic_message_page = mshv_cpu->synic_message_page; + + if (likely(synic_message_page)) + msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX; + } + break; + + case MSHV_ENTRY_REASON_INTERCEPT: + WARN_ON(!mshv_vsm_capabilities.intercept_page_available); + msg = (struct hv_message *)hvp->intercept_message; + break; + + default: + panic("unknown entry reason: %d", hvp->vtl_entry_reason); + } + + if (!msg) + return true; + msg_type = READ_ONCE(msg->header.message_type); + + switch (msg_type) { + case HVMSG_NONE: + break; + case HVMSG_X64_EXCEPTION_INTERCEPT: + { + struct hv_x64_exception_intercept_message *expt_msg = + (struct hv_x64_exception_intercept_message*)msg->u.payload; + if (expt_msg->exception_vector != X86_TRAP_VC) + return false; + } + break; + case HVMSG_SYNIC_SINT_DELIVERABLE: + return false; + case HVMSG_X64_HALT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + break; + default: + return false; + } + + return true; +} +#endif /* CONFIG_SEV_GUEST */ + /* * Attempts to directly inject the interrupts in the proxy_irr field. * Returns true if an exit to user-space is required. @@ -1342,14 +1665,20 @@ static bool mshv_pull_proxy_irr(struct mshv_vtl_run *run) { bool ret = READ_ONCE(run->scan_proxy_irr); - if (!hv_isolation_type_tdx() || - !(run->offload_flags & MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT)) + if (!(run->offload_flags & MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT)) return ret; + if (hv_isolation_type_tdx()) { #ifdef CONFIG_INTEL_TDX_GUEST - ret = mshv_tdx_pull_proxy_irr(run); - mshv_tdx_update_rvi_halt(run); + ret = __mshv_pull_proxy_irr(run, tdx_this_apic_page()); + mshv_tdx_update_rvi_halt(run); +#endif + } else if (hv_isolation_type_snp()) { +#ifdef CONFIG_SEV_GUEST + ret = __mshv_pull_proxy_irr(run, snp_this_savic_page()); #endif + } + return ret; } @@ -1423,6 +1752,10 @@ static int mshv_vtl_ioctl_return_to_lower_vtl(void) continue; /* Exit handled entirely in kernel */ else goto done; + } else if (hv_isolation_type_snp()) { + if (mshv_snp_try_handle_intercept(mshv_vtl_this_run()) && + mshv_snp_try_handle_exit(mshv_vtl_this_run())) + continue; /* Exit handled entirely in kernel */ } hvp = hv_vp_assist_page[smp_processor_id()]; @@ -1921,7 +2254,7 @@ static void guest_vsm_vmsa_pfn_this_cpu(void *arg) cpu = get_cpu(); vmsa_guest_vsm_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page); if (!vmsa_guest_vsm_page) { - if (mshv_configure_vmsa_page(1, per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu))) + if (mshv_snp_configure_vmsa_page(1, per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu))) *pfn = -ENOMEM; else vmsa_guest_vsm_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page); @@ -1952,6 +2285,41 @@ static long mshv_vtl_ioctl_guest_vsm_vmsa_pfn(void __user *user_arg) return ret; } + +static void secure_avic_vtl0_this_cpu(void *arg) +{ + int cpu; + struct page *snp_secure_avic_page; + u64 *pfn = arg; + + cpu = get_cpu(); + snp_secure_avic_page = *this_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page); + put_cpu(); + + *pfn = snp_secure_avic_page ? page_to_pfn(snp_secure_avic_page) : -ENOMEM; +} + +static long mshv_vtl_ioctl_secure_avic_vtl0_pfn(void __user *user_arg) +{ + u64 pfn; + u32 cpu_id; + long ret; + + ret = copy_from_user(&cpu_id, user_arg, sizeof(cpu_id)) ? -EFAULT : 0; + if (ret) + return ret; + + ret = smp_call_function_single(cpu_id, secure_avic_vtl0_this_cpu, &pfn, true); + if (ret) + return ret; + ret = (long)pfn; + if (ret < 0) + return ret; + + ret = copy_to_user(user_arg, &pfn, sizeof(pfn)) ? -EFAULT : 0; + + return ret; +} #endif static void ack_kick(void *cancel_cpu_run) @@ -2084,6 +2452,9 @@ mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) case MSHV_VTL_GUEST_VSM_VMSA_PFN: ret = mshv_vtl_ioctl_guest_vsm_vmsa_pfn((void __user *)arg); break; + case MSHV_VTL_SECURE_AVIC_VTL0_PFN: + ret = mshv_vtl_ioctl_secure_avic_vtl0_pfn((void __user *)arg); + break; #endif case MSHV_VTL_KICK_CPU: @@ -2100,7 +2471,7 @@ mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) { - struct page *page; + struct page *page = NULL; int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK; int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT; @@ -2124,7 +2495,7 @@ static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; page_ptr_ptr = per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu); if (!*page_ptr_ptr) { - if (mshv_configure_vmsa_page(1, page_ptr_ptr) < 0) + if (mshv_snp_configure_vmsa_page(1, page_ptr_ptr) < 0) return VM_FAULT_SIGBUS; } page = *page_ptr_ptr; @@ -2132,18 +2503,16 @@ static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) if (!hv_isolation_type_snp()) return VM_FAULT_SIGBUS; page = *per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_page, cpu); -#ifdef CONFIG_INTEL_TDX_GUEST } else if (real_off == MSHV_APIC_PAGE_OFFSET) { - if (!hv_isolation_type_tdx()) - return VM_FAULT_SIGBUS; - - page = tdx_apic_page(cpu); -#endif + page = mshv_apic_page(cpu); #endif } else { return VM_FAULT_NOPAGE; } + if (!page) + return VM_FAULT_SIGBUS; + get_page(page); vmf->page = page; @@ -2774,7 +3143,7 @@ static int __init mshv_vtl_init(void) goto unset_func; } - ret = mshv_tdx_create_apicid_to_cpuid_mapping(dev); + ret = mshv_create_apicid_to_cpuid_mapping(dev); if (ret) goto unset_func; @@ -2843,7 +3212,7 @@ static int __init mshv_vtl_init(void) static void __exit mshv_vtl_exit(void) { mshv_setup_vtl_func(NULL, NULL, NULL); - mshv_tdx_free_apicid_to_cpuid_mapping(); + mshv_free_apicid_to_cpuid_mapping(); misc_deregister(&mshv_vtl_sint_dev); misc_deregister(&mshv_vtl_hvcall); misc_deregister(&mshv_vtl_low); diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index 7ba3a3f24989e..ebe3902770925 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -367,6 +367,7 @@ struct mshv_kick_cpus { #define MSHV_VTL_RMPQUERY _IOW(MSHV_IOCTL, 0x35, struct mshv_rmpquery) #define MSHV_VTL_INVLPGB _IOW(MSHV_IOCTL, 0x36, struct mshv_invlpgb) #define MSHV_VTL_TLBSYNC _IO(MSHV_IOCTL, 0x37) +#define MSHV_VTL_SECURE_AVIC_VTL0_PFN _IOWR(MSHV_IOCTL, 0x39, __u64) /* VMBus device IOCTLs */ From c6dbdb558099cdb097321525406c816324900456 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Thu, 3 Jul 2025 18:12:13 -0700 Subject: [PATCH 26/26] don't rely on nrip --- drivers/hv/mshv_vtl_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c index b587cce8fc82f..412c227bfd398 100644 --- a/drivers/hv/mshv_vtl_main.c +++ b/drivers/hv/mshv_vtl_main.c @@ -1575,7 +1575,7 @@ static bool mshv_snp_try_handle_exit(struct mshv_vtl_run *run) /* The guest indicates it's idle by reading this synthetic MSR. */ vmsa->rax = 0; vmsa->rdx = 0; - vmsa->rip = vmsa->guest_nrip; + vmsa->rip += 2; /* vmsa->guest_nrip might not be available although here it should be. */ run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; run->flags |= MSHV_VTL_RUN_FLAG_HALTED;