Re: [PATCH 07/13] KVM: SVM: Add VNMI support in get/set_nmi_mask
From: Sean Christopherson
Date: Thu Nov 17 2022 - 13:54:17 EST
On Thu, Nov 17, 2022, Maxim Levitsky wrote:
> From: Santosh Shukla <santosh.shukla@xxxxxxx>
>
> VMCB intr_ctrl bit12 (V_NMI_MASK) is set by the processor when handling
> NMI in guest and is cleared after the NMI is handled. Treat V_NMI_MASK
> as read-only in the hypervisor except for the SMM case where hypervisor
> before entring and after leaving SMM mode requires to set and unset
> V_NMI_MASK.
>
> Adding API(get_vnmi_vmcb) in order to return the correct vmcb for L1 or
> L2.
>
> Maxim:
> - made set_vnmi_mask/clear_vnmi_mask/is_vnmi_mask warn if called
> without vNMI enabled
> - clear IRET intercept in svm_set_nmi_mask even with vNMI
>
> Signed-off-by: Santosh Shukla <santosh.shukla@xxxxxxx>
> Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
> ---
> arch/x86/kvm/svm/svm.c | 18 ++++++++++++++-
> arch/x86/kvm/svm/svm.h | 52 ++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 69 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 08a7b2a0a29f3a..c16f68f6c4f7d7 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3618,13 +3618,29 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
>
> static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
> {
> - return !!(vcpu->arch.hflags & HF_NMI_MASK);
> + struct vcpu_svm *svm = to_svm(vcpu);
> +
> + if (is_vnmi_enabled(svm))
> + return is_vnmi_mask_set(svm);
> + else
> + return !!(vcpu->arch.hflags & HF_NMI_MASK);
> }
>
> static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
>
> + if (is_vnmi_enabled(svm)) {
> + if (masked)
> + set_vnmi_mask(svm);
I believe not setting INTERCEPT_IRET is correct, but only because the existing
code is unnecessary. And this all very subtly relies on KVM_REQ_EVENT being set
and/or KVM already being in kvm_check_and_inject_events().
When NMIs become unblocked, INTERCEPT_IRET can be cleared, but KVM should also
pending KVM_REQ_EVENT. AFAICT, that doesn't happen when this is called via the
emulator. Ah, because em_iret() only handles RM for Intel's restricted guest
crap. I.e. it "works" only because it never happens. All other flows set
KVM_REQ_EVENT when toggling NMI blocking, e.g. the RSM path of kvm_smm_changed().
And when NMIs become blocked, there's no need to force INTERCEPT_IRET in this
code because kvm_check_and_inject_events() will request an NMI window and set the
intercept if necessary, and all paths that set NMI blocking are guaranteed to
reach kvm_check_and_inject_events() before entering the guest.
1. RSM => kvm_smm_changed() sets KVM_REQ_EVENT
2. enter_smm() is only called from within kvm_check_and_inject_events(),
before pending NMIs are processed (yay priority)
3. emulator_set_nmi_mask() never blocks NMIs, only does the half-baked IRET emulation
4. kvm_vcpu_ioctl_x86_set_vcpu_event() sets KVM_REQ_EVENT
So, can you add a prep patch to drop the forced INTERCEPT_IRET? That way the
logic for vNMI and !vNMI is the same.
> + else {
> + clear_vnmi_mask(svm);
This is the only code that sets/clears the vNMI mask, so rather than have set/clear
helpers, what about a single helper to do the dirty work?
> + if (!sev_es_guest(vcpu->kvm))
> + svm_clr_intercept(svm, INTERCEPT_IRET);
> + }
> + return;
> + }
> +
> if (masked) {
> vcpu->arch.hflags |= HF_NMI_MASK;
> if (!sev_es_guest(vcpu->kvm))
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index f5383104d00580..bf7f4851dee204 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -35,6 +35,7 @@ extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
> extern bool npt_enabled;
> extern int vgif;
> extern bool intercept_smi;
> +extern bool vnmi;
>
> enum avic_modes {
> AVIC_MODE_NONE = 0,
> @@ -531,6 +532,57 @@ static inline bool is_x2apic_msrpm_offset(u32 offset)
> (msr < (APIC_BASE_MSR + 0x100));
> }
>
> +static inline struct vmcb *get_vnmi_vmcb(struct vcpu_svm *svm)
> +{
> + if (!vnmi)
> + return NULL;
> +
> + if (is_guest_mode(&svm->vcpu))
> + return svm->nested.vmcb02.ptr;
> + else
> + return svm->vmcb01.ptr;
> +}
> +
> +static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
> +{
> + struct vmcb *vmcb = get_vnmi_vmcb(svm);
> +
> + if (vmcb)
> + return !!(vmcb->control.int_ctl & V_NMI_ENABLE);
> + else
> + return false;
Maybe just this?
return vmcb && (vmcb->control.int_ctl & V_NMI_ENABLE);
Or if an inner helper is added:
return vmcb && __is_vnmi_enabled(vmcb);
> +}
> +
> +static inline bool is_vnmi_mask_set(struct vcpu_svm *svm)
> +{
> + struct vmcb *vmcb = get_vnmi_vmcb(svm);
> +
> + if (!WARN_ON_ONCE(!vmcb))
Rather than WARN, add an inner __is_vnmi_enabled() that takes the vnmi_vmcb.
Actually, if you do that, the test/set/clear helpers can go away entirely.
> + return false;
> +
> + return !!(vmcb->control.int_ctl & V_NMI_MASK);
> +}
> +
> +static inline void set_vnmi_mask(struct vcpu_svm *svm)
> +{
> + struct vmcb *vmcb = get_vnmi_vmcb(svm);
> +
> + if (!WARN_ON_ONCE(!vmcb))
> + return;
> +
> + vmcb->control.int_ctl |= V_NMI_MASK;
> +}
> +
> +static inline void clear_vnmi_mask(struct vcpu_svm *svm)
> +{
> + struct vmcb *vmcb = get_vnmi_vmcb(svm);
> +
> + if (!WARN_ON_ONCE(!vmcb))
> + return;
> +
> + vmcb->control.int_ctl &= ~V_NMI_MASK;
> +}
These helpers can all go in svm. There are no users oustide of svm.c, and
unless I'm misunderstanding how nested works, there should never be oustide users.
E.g. with HF_NMI_MASK => svm->nmi_masked, the end result can be something like:
static bool __is_vnmi_enabled(struct *vmcb)
{
return !!(vmcb->control.int_ctl & V_NMI_ENABLE);
}
static bool is_vnmi_enabled(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_vnmi_vmcb(svm);
return vmcb && __is_vnmi_enabled(vmcb);
}
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = get_vnmi_vmcb(svm);
if (vmcb && __is_vnmi_enabled(vmcb))
return !!(vmcb->control.int_ctl & V_NMI_MASK);
else
return !!(vcpu->arch.hflags & HF_NMI_MASK);
}
static void svm_set_or_clear_vnmi_mask(struct vmcb *vmcb, bool set)
{
if (set)
vmcb->control.int_ctl |= V_NMI_MASK;
else
vmcb->control.int_ctl &= ~V_NMI_MASK;
}
static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = get_vnmi_vmcb(svm);
if (vmcb && __is_vnmi_enabled(vmcb)) {
if (masked)
vmcb->control.int_ctl |= V_NMI_MASK;
else
vmcb->control.int_ctl &= ~V_NMI_MASK;
} else {
svm->nmi_masked = masked;
}
if (!masked)
svm_disable_iret_interception(svm);
}