From 9b57e9d5010bbed7c0d9d445085840f7025e6f9a Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 19 Oct 2021 19:53:59 +0200 Subject: [PATCH 01/19] KVM: s390: clear kicked_mask before sleeping again The idea behind kicked mask is that we should not re-kick a vcpu that is already in the "kick" process, i.e. that was kicked and is is about to be dispatched if certain conditions are met. The problem with the current implementation is, that it assumes the kicked vcpu is going to enter SIE shortly. But under certain circumstances, the vcpu we just kicked will be deemed non-runnable and will remain in wait state. This can happen, if the interrupt(s) this vcpu got kicked to deal with got already cleared (because the interrupts got delivered to another vcpu). In this case kvm_arch_vcpu_runnable() would return false, and the vcpu would remain in kvm_vcpu_block(), but this time with its kicked_mask bit set. So next time around we wouldn't kick the vcpu form __airqs_kick_single_vcpu(), but would assume that we just kicked it. Let us make sure the kicked_mask is cleared before we give up on re-dispatching the vcpu. Fixes: 9f30f6216378 ("KVM: s390: add gib_alert_irq_handler()") Reported-by: Matthew Rosato Signed-off-by: Halil Pasic Reviewed-by: Christian Borntraeger Reviewed-by: Michael Mueller Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20211019175401.3757927-2-pasic@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6a6dd5e1daf6..1c97493d21e1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3363,6 +3363,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); return kvm_s390_vcpu_has_irq(vcpu, 0); } From 0e9ff65f455dfd0a8aea5e7843678ab6fe097e21 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 19 Oct 2021 19:54:00 +0200 Subject: [PATCH 02/19] KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu Changing the deliverable mask in __airqs_kick_single_vcpu() is a bug. If one idle vcpu can't take the interrupts we want to deliver, we should look for another vcpu that can, instead of saying that we don't want to deliver these interrupts by clearing the bits from the deliverable_mask. Fixes: 9f30f6216378 ("KVM: s390: add gib_alert_irq_handler()") Signed-off-by: Halil Pasic Reviewed-by: Christian Borntraeger Reviewed-by: Michael Mueller Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20211019175401.3757927-3-pasic@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 10722455fd02..2245f4b8d362 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3053,13 +3053,14 @@ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus); struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_vcpu *vcpu; + u8 vcpu_isc_mask; for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) { vcpu = kvm_get_vcpu(kvm, vcpu_idx); if (psw_ioint_disabled(vcpu)) continue; - deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); - if (deliverable_mask) { + vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24); + if (deliverable_mask & vcpu_isc_mask) { /* lately kicked but not yet running */ if (test_and_set_bit(vcpu_idx, gi->kicked_mask)) return; From 2d8fb8f3914b40e3cc12f8cbb74daefd5245349d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:40 +0200 Subject: [PATCH 03/19] s390/gmap: validate VMA in __gmap_zap() We should not walk/touch page tables outside of VMA boundaries when holding only the mmap sem in read mode. Evil user space can modify the VMA layout just before this function runs and e.g., trigger races with page table removal code since commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"). The pure prescence in our guest_to_host radix tree does not imply that there is a VMA. Further, we should not allocate page tables (via get_locked_pte()) outside of VMA boundaries: if evil user space decides to map hugetlbfs to these ranges, bad things will happen because we suddenly have PTE or PMD page tables where we shouldn't have them. Similarly, we have to check if we suddenly find a hugetlbfs VMA, before calling get_locked_pte(). Note that gmap_discard() is different: zap_page_range()->unmap_single_vma() makes sure to stay within VMA boundaries. Fixes: b31288fa83b2 ("s390/kvm: support collaborative memory management") Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-2-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/gmap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 4d3b33ce81c6..e0735c343775 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -672,6 +672,7 @@ EXPORT_SYMBOL_GPL(gmap_fault); */ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { + struct vm_area_struct *vma; unsigned long vmaddr; spinlock_t *ptl; pte_t *ptep; @@ -681,6 +682,11 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) gaddr >> PMD_SHIFT); if (vmaddr) { vmaddr |= gaddr & ~PMD_MASK; + + vma = vma_lookup(gmap->mm, vmaddr); + if (!vma || is_vm_hugetlb_page(vma)) + return; + /* Get pointer to the page table entry */ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); if (likely(ptep)) From b159f94c86b43cf7e73e654bc527255b1f4eafc4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:41 +0200 Subject: [PATCH 04/19] s390/gmap: don't unconditionally call pte_unmap_unlock() in __gmap_zap() ... otherwise we will try unlocking a spinlock that was never locked via a garbage pointer. At the time we reach this code path, we usually successfully looked up a PGSTE already; however, evil user space could have manipulated the VMA layout in the meantime and triggered removal of the page table. Fixes: 1e133ab296f3 ("s390/mm: split arch/s390/mm/pgtable.c") Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-3-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/gmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index e0735c343775..d63c0ccc5ccd 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -689,9 +689,10 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) /* Get pointer to the page table entry */ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (likely(ptep)) + if (likely(ptep)) { ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); - pte_unmap_unlock(ptep, ptl); + pte_unmap_unlock(ptep, ptl); + } } } EXPORT_SYMBOL_GPL(__gmap_zap); From fe3d10024073f06f04c74b9674bd71ccc1d787cf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:42 +0200 Subject: [PATCH 05/19] s390/mm: validate VMA in PGSTE manipulation functions We should not walk/touch page tables outside of VMA boundaries when holding only the mmap sem in read mode. Evil user space can modify the VMA layout just before this function runs and e.g., trigger races with page table removal code since commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"). gfn_to_hva() will only translate using KVM memory regions, but won't validate the VMA. Further, we should not allocate page tables outside of VMA boundaries: if evil user space decides to map hugetlbfs to these ranges, bad things will happen because we suddenly have PTE or PMD page tables where we shouldn't have them. Similarly, we have to check if we suddenly find a hugetlbfs VMA, before calling get_locked_pte(). Fixes: 2d42f9477320 ("s390/kvm: Add PGSTE manipulation functions") Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-4-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 034721a68d8f..2717a406edeb 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -988,6 +988,7 @@ EXPORT_SYMBOL(get_guest_storage_key); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste) { + struct vm_area_struct *vma; unsigned long pgstev; spinlock_t *ptl; pgste_t pgste; @@ -997,6 +998,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, WARN_ON_ONCE(orc > ESSA_MAX); if (unlikely(orc > ESSA_MAX)) return -EINVAL; + + vma = vma_lookup(mm, hva); + if (!vma || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1089,10 +1094,14 @@ EXPORT_SYMBOL(pgste_perform_essa); int set_pgste_bits(struct mm_struct *mm, unsigned long hva, unsigned long bits, unsigned long value) { + struct vm_area_struct *vma; spinlock_t *ptl; pgste_t new; pte_t *ptep; + vma = vma_lookup(mm, hva); + if (!vma || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1117,9 +1126,13 @@ EXPORT_SYMBOL(set_pgste_bits); */ int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) { + struct vm_area_struct *vma; spinlock_t *ptl; pte_t *ptep; + vma = vma_lookup(mm, hva); + if (!vma || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; From 949f5c1244ee6c36d2e81c588d1200eaa83a3df6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:43 +0200 Subject: [PATCH 06/19] s390/mm: fix VMA and page table handling code in storage key handling functions There are multiple things broken about our storage key handling functions: 1. We should not walk/touch page tables outside of VMA boundaries when holding only the mmap sem in read mode. Evil user space can modify the VMA layout just before this function runs and e.g., trigger races with page table removal code since commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"). gfn_to_hva() will only translate using KVM memory regions, but won't validate the VMA. 2. We should not allocate page tables outside of VMA boundaries: if evil user space decides to map hugetlbfs to these ranges, bad things will happen because we suddenly have PTE or PMD page tables where we shouldn't have them. 3. We don't handle large PUDs that might suddenly appeared inside our page table hierarchy. Don't manually allocate page tables, properly validate that we have VMA and bail out on pud_large(). All callers of page table handling functions, except get_guest_storage_key(), call fixup_user_fault() in case they receive an -EFAULT and retry; this will allocate the necessary page tables if required. To keep get_guest_storage_key() working as expected and not requiring kvm_s390_get_skeys() to call fixup_user_fault() distinguish between "there is simply no page table or huge page yet and the key is assumed to be 0" and "this is a fault to be reported". Although commit 637ff9efe5ea ("s390/mm: Add huge pmd storage key handling") introduced most of the affected code, it was actually already broken before when using get_locked_pte() without any VMA checks. Note: Ever since commit 637ff9efe5ea ("s390/mm: Add huge pmd storage key handling") we can no longer set a guest storage key (for example from QEMU during VM live migration) without actually resolving a fault. Although we would have created most page tables, we would choke on the !pmd_present(), requiring a call to fixup_user_fault(). I would have thought that this is problematic in combination with postcopy life migration ... but nobody noticed and this patch doesn't change the situation. So maybe it's just fine. Fixes: 9fcf93b5de06 ("KVM: S390: Create helper function get_guest_storage_key") Fixes: 24d5dd0208ed ("s390/kvm: Provide function for setting the guest storage key") Fixes: a7e19ab55ffd ("KVM: s390: handle missing storage-key facility") Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-5-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 57 +++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2717a406edeb..6ad634a27d5b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -429,22 +429,36 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, } #ifdef CONFIG_PGSTE -static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) +static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp) { + struct vm_area_struct *vma; pgd_t *pgd; p4d_t *p4d; pud_t *pud; - pmd_t *pmd; + + /* We need a valid VMA, otherwise this is clearly a fault. */ + vma = vma_lookup(mm, addr); + if (!vma) + return -EFAULT; pgd = pgd_offset(mm, addr); - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return NULL; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return NULL; - pmd = pmd_alloc(mm, pud, addr); - return pmd; + if (!pgd_present(*pgd)) + return -ENOENT; + + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return -ENOENT; + + pud = pud_offset(p4d, addr); + if (!pud_present(*pud)) + return -ENOENT; + + /* Large PUDs are not supported yet. */ + if (pud_large(*pud)) + return -EFAULT; + + *pmdp = pmd_offset(pud, addr); + return 0; } #endif @@ -778,8 +792,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp; pte_t *ptep; - pmdp = pmd_alloc_map(mm, addr); - if (unlikely(!pmdp)) + if (pmd_lookup(mm, addr, &pmdp)) return -EFAULT; ptl = pmd_lock(mm, pmdp); @@ -881,8 +894,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) pte_t *ptep; int cc = 0; - pmdp = pmd_alloc_map(mm, addr); - if (unlikely(!pmdp)) + if (pmd_lookup(mm, addr, &pmdp)) return -EFAULT; ptl = pmd_lock(mm, pmdp); @@ -935,15 +947,24 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp; pte_t *ptep; - pmdp = pmd_alloc_map(mm, addr); - if (unlikely(!pmdp)) + /* + * If we don't have a PTE table and if there is no huge page mapped, + * the storage key is 0. + */ + *key = 0; + + switch (pmd_lookup(mm, addr, &pmdp)) { + case -ENOENT: + return 0; + case 0: + break; + default: return -EFAULT; + } ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { - /* Not yet mapped memory has a zero key */ spin_unlock(ptl); - *key = 0; return 0; } From 46c22ffd2772201662350bc7b94b9ea9d3ee5ac2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:44 +0200 Subject: [PATCH 07/19] s390/uv: fully validate the VMA before calling follow_page() We should not walk/touch page tables outside of VMA boundaries when holding only the mmap sem in read mode. Evil user space can modify the VMA layout just before this function runs and e.g., trigger races with page table removal code since commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"). find_vma() does not check if the address is >= the VMA start address; use vma_lookup() instead. Fixes: 214d9bbcd3a6 ("s390/mm: provide memory management functions for protected KVM guests") Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Reviewed-by: Liam R. Howlett Link: https://lore.kernel.org/r/20210909162248.14969-6-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/kernel/uv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 5a656c7b7a67..f95ccbd39692 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -212,7 +212,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) uaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(uaddr)) goto out; - vma = find_vma(gmap->mm, uaddr); + vma = vma_lookup(gmap->mm, uaddr); if (!vma) goto out; /* From 8318c404cf8c2b5141746af739fdb62c030508ca Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:46 +0200 Subject: [PATCH 08/19] s390/mm: no need for pte_alloc_map_lock() if we know the pmd is present pte_map_lock() is sufficient. Signed-off-by: David Hildenbrand Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-8-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 6ad634a27d5b..e74cc59dcd67 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -814,10 +814,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } spin_unlock(ptl); - ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); - if (unlikely(!ptep)) - return -EFAULT; - + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | PGSTE_ACC_BITS | PGSTE_FP_BIT); @@ -912,10 +909,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) } spin_unlock(ptl); - ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); - if (unlikely(!ptep)) - return -EFAULT; - + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); new = old = pgste_get_lock(ptep); /* Reset guest reference bit only */ pgste_val(new) &= ~PGSTE_GR_BIT; @@ -977,10 +971,7 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, } spin_unlock(ptl); - ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); - if (unlikely(!ptep)) - return -EFAULT; - + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); pgste = pgste_get_lock(ptep); *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; paddr = pte_val(*ptep) & PAGE_MASK; From 7cb70266b0e3996daf4bbef9758b6e27e9ec904d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:47 +0200 Subject: [PATCH 09/19] s390/mm: optimize set_guest_storage_key() We already optimize get_guest_storage_key() to assume that if we don't have a PTE table and don't have a huge page mapped that the storage key is 0. Similarly, optimize set_guest_storage_key() to simply do nothing in case the key to set is 0. Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-9-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e74cc59dcd67..1c9aeb361f8d 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -792,13 +792,23 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp; pte_t *ptep; - if (pmd_lookup(mm, addr, &pmdp)) + /* + * If we don't have a PTE table and if there is no huge page mapped, + * we can ignore attempts to set the key to 0, because it already is 0. + */ + switch (pmd_lookup(mm, addr, &pmdp)) { + case -ENOENT: + return key ? -EFAULT : 0; + case 0: + break; + default: return -EFAULT; + } ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); - return -EFAULT; + return key ? -EFAULT : 0; } if (pmd_large(*pmdp)) { From 14ea40e22c4193ed71cd93ec79c0c05216c3600a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:48 +0200 Subject: [PATCH 10/19] s390/mm: optimize reset_guest_reference_bit() We already optimize get_guest_storage_key() to assume that if we don't have a PTE table and don't have a huge page mapped that the storage key is 0. Similarly, optimize reset_guest_reference_bit() to simply do nothing if there is no PTE table and no huge page mapped. Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-10-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 1c9aeb361f8d..c16232cd0ec5 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -901,13 +901,23 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) pte_t *ptep; int cc = 0; - if (pmd_lookup(mm, addr, &pmdp)) + /* + * If we don't have a PTE table and if there is no huge page mapped, + * the storage key is 0 and there is nothing for us to do. + */ + switch (pmd_lookup(mm, addr, &pmdp)) { + case -ENOENT: + return 0; + case 0: + break; + default: return -EFAULT; + } ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); - return -EFAULT; + return 0; } if (pmd_large(*pmdp)) { From 57c5df13eca4017ed28f9375dc1d246ec0f54217 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 20 Sep 2021 15:24:49 +0200 Subject: [PATCH 11/19] KVM: s390: pv: add macros for UVC CC values Add macros to describe the 4 possible CC values returned by the UVC instruction. Signed-off-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Message-Id: <20210920132502.36111-2-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/uv.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index fe92a4caf5ec..9ab1914c5b95 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -18,6 +18,11 @@ #include #include +#define UVC_CC_OK 0 +#define UVC_CC_ERROR 1 +#define UVC_CC_BUSY 2 +#define UVC_CC_PARTIAL 3 + #define UVC_RC_EXECUTED 0x0001 #define UVC_RC_INV_CMD 0x0002 #define UVC_RC_INV_STATE 0x0003 From d4074324b07a94a1fca476d452dfbb3a4e7bf656 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 20 Sep 2021 15:24:50 +0200 Subject: [PATCH 12/19] KVM: s390: pv: avoid double free of sida page If kvm_s390_pv_destroy_cpu is called more than once, we risk calling free_page on a random page, since the sidad field is aliased with the gbea, which is not guaranteed to be zero. This can happen, for example, if userspace calls the KVM_PV_DISABLE IOCTL, and it fails, and then userspace calls the same IOCTL again. This scenario is only possible if KVM has some serious bug or if the hardware is broken. The solution is to simply return successfully immediately if the vCPU was already non secure. Signed-off-by: Claudio Imbrenda Fixes: 19e1227768863a1469797c13ef8fea1af7beac2c ("KVM: S390: protvirt: Introduce instruction data area bounce buffer") Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Message-Id: <20210920132502.36111-3-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Christian Borntraeger --- arch/s390/kvm/pv.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index c8841f476e91..0a854115100b 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -16,18 +16,17 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) { - int cc = 0; + int cc; - if (kvm_s390_pv_cpu_get_handle(vcpu)) { - cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), - UVC_CMD_DESTROY_SEC_CPU, rc, rrc); + if (!kvm_s390_pv_cpu_get_handle(vcpu)) + return 0; + + cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc); + + KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", + vcpu->vcpu_id, *rc, *rrc); + WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc); - KVM_UV_EVENT(vcpu->kvm, 3, - "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", - vcpu->vcpu_id, *rc, *rrc); - WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", - *rc, *rrc); - } /* Intended memory leak for something that should never happen. */ if (!cc) free_pages(vcpu->arch.pv.stor_base, From 1e2aa46de526a5adafe580bca4c25856bb06f09e Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 20 Sep 2021 15:24:51 +0200 Subject: [PATCH 13/19] KVM: s390: pv: avoid stalls for kvm_s390_pv_init_vm When the system is heavily overcommitted, kvm_s390_pv_init_vm might generate stall notifications. Fix this by using uv_call_sched instead of just uv_call. This is ok because we are not holding spinlocks. Signed-off-by: Claudio Imbrenda Fixes: 214d9bbcd3a672 ("s390/mm: provide memory management functions for protected KVM guests") Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Message-Id: <20210920132502.36111-4-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Christian Borntraeger --- arch/s390/kvm/pv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 0a854115100b..00d272d134c2 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -195,7 +195,7 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base; uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; - cc = uv_call(0, (u64)&uvcb); + cc = uv_call_sched(0, (u64)&uvcb); *rc = uvcb.header.rc; *rrc = uvcb.header.rrc; KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x", From f0a1a0615a6ff6d38af2c65a522698fb4bb85df6 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 20 Sep 2021 15:24:52 +0200 Subject: [PATCH 14/19] KVM: s390: pv: avoid stalls when making pages secure Improve make_secure_pte to avoid stalls when the system is heavily overcommitted. This was especially problematic in kvm_s390_pv_unpack, because of the loop over all pages that needed unpacking. Due to the locks being held, it was not possible to simply replace uv_call with uv_call_sched. A more complex approach was needed, in which uv_call is replaced with __uv_call, which does not loop. When the UVC needs to be executed again, -EAGAIN is returned, and the caller (or its caller) will try again. When -EAGAIN is returned, the path is the same as when the page is in writeback (and the writeback check is also performed, which is harmless). Fixes: 214d9bbcd3a672 ("s390/mm: provide memory management functions for protected KVM guests") Signed-off-by: Claudio Imbrenda Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20210920132502.36111-5-imbrenda@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kernel/uv.c | 29 +++++++++++++++++++++++------ arch/s390/kvm/intercept.c | 5 +++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index f95ccbd39692..09b80d371409 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -165,7 +165,7 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr, { pte_t entry = READ_ONCE(*ptep); struct page *page; - int expected, rc = 0; + int expected, cc = 0; if (!pte_present(entry)) return -ENXIO; @@ -181,12 +181,25 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr, if (!page_ref_freeze(page, expected)) return -EBUSY; set_bit(PG_arch_1, &page->flags); - rc = uv_call(0, (u64)uvcb); + /* + * If the UVC does not succeed or fail immediately, we don't want to + * loop for long, or we might get stall notifications. + * On the other hand, this is a complex scenario and we are holding a lot of + * locks, so we can't easily sleep and reschedule. We try only once, + * and if the UVC returned busy or partial completion, we return + * -EAGAIN and we let the callers deal with it. + */ + cc = __uv_call(0, (u64)uvcb); page_ref_unfreeze(page, expected); - /* Return -ENXIO if the page was not mapped, -EINVAL otherwise */ - if (rc) - rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL; - return rc; + /* + * Return -ENXIO if the page was not mapped, -EINVAL for other errors. + * If busy or partially completed, return -EAGAIN. + */ + if (cc == UVC_CC_OK) + return 0; + else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL) + return -EAGAIN; + return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } /* @@ -239,6 +252,10 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) mmap_read_unlock(gmap->mm); if (rc == -EAGAIN) { + /* + * If we are here because the UVC returned busy or partial + * completion, this is just a useless check, but it is safe. + */ wait_on_page_writeback(page); } else if (rc == -EBUSY) { /* diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 2bd8f854f1b4..d07ff646d844 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -518,6 +518,11 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu) */ if (rc == -EINVAL) return 0; + /* + * If we got -EAGAIN here, we simply return it. It will eventually + * get propagated all the way to userspace, which should then try + * again. + */ return rc; } From 8eeba194a32e0f50329354a696baaa2e3d9accc5 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 8 Oct 2021 22:31:07 +0200 Subject: [PATCH 15/19] KVM: s390: Simplify SIGP Set Arch handling The Principles of Operations describe the various reasons that each individual SIGP orders might be rejected, and the status bit that are set for each condition. For example, for the Set Architecture order, it states: "If it is not true that all other CPUs in the configu- ration are in the stopped or check-stop state, ... bit 54 (incorrect state) ... is set to one." However, it also states: "... if the CZAM facility is installed, ... bit 55 (invalid parameter) ... is set to one." Since the Configuration-z/Architecture-Architectural Mode (CZAM) facility is unconditionally presented, there is no need to examine each VCPU to determine if it is started/stopped. It can simply be rejected outright with the Invalid Parameter bit. Fixes: b697e435aeee ("KVM: s390: Support Configuration z/Architecture Mode") Signed-off-by: Eric Farman Reviewed-by: Thomas Huth Reviewed-by: Claudio Imbrenda Reviewed-by: David Hildenbrand Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20211008203112.1979843-2-farman@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sigp.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 683036c1c92a..cf4de80bd541 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -151,22 +151,10 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter, u64 *status_reg) { - unsigned int i; - struct kvm_vcpu *v; - bool all_stopped = true; - - kvm_for_each_vcpu(i, v, vcpu->kvm) { - if (v == vcpu) - continue; - if (!is_vcpu_stopped(v)) - all_stopped = false; - } - *status_reg &= 0xffffffff00000000UL; /* Reject set arch order, with czam we're always in z/Arch mode. */ - *status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER : - SIGP_STATUS_INCORRECT_STATE); + *status_reg |= SIGP_STATUS_INVALID_PARAMETER; return SIGP_CC_STATUS_STORED; } From 67cf68b6a5ccac8bc7dfef0a220b59af4c83fd2c Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 8 Oct 2021 22:31:12 +0200 Subject: [PATCH 16/19] KVM: s390: Add a routine for setting userspace CPU state This capability exists, but we don't record anything when userspace enables it. Let's refactor that code so that a note can be made in the debug logs that it was enabled. Signed-off-by: Eric Farman Reviewed-by: Thomas Huth Reviewed-by: Claudio Imbrenda Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20211008203112.1979843-7-farman@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 6 +++--- arch/s390/kvm/kvm-s390.h | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1c97493d21e1..6482ea9139bb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2487,8 +2487,8 @@ long kvm_arch_vm_ioctl(struct file *filp, case KVM_S390_PV_COMMAND: { struct kvm_pv_cmd args; - /* protvirt means user sigp */ - kvm->arch.user_cpu_state_ctrl = 1; + /* protvirt means user cpu state */ + kvm_s390_set_user_cpu_state_ctrl(kvm); r = 0; if (!is_prot_virt_host()) { r = -EINVAL; @@ -3802,7 +3802,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu_load(vcpu); /* user space knows about this interface - let it control the state */ - vcpu->kvm->arch.user_cpu_state_ctrl = 1; + kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); switch (mp_state->mp_state) { case KVM_MP_STATE_STOPPED: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 52bc8fbaa60a..c07a050d757d 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -208,6 +208,15 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) return kvm->arch.user_cpu_state_ctrl != 0; } +static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm) +{ + if (kvm->arch.user_cpu_state_ctrl) + return; + + VM_EVENT(kvm, 3, "%s", "ENABLE: Userspace CPU state control"); + kvm->arch.user_cpu_state_ctrl = 1; +} + /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); From 85f517b29418158d3e6e90c3f0fc01b306d2f1a1 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 22 Oct 2021 17:26:48 +0200 Subject: [PATCH 17/19] KVM: s390: Fix handle_sske page fault handling If handle_sske cannot set the storage key, because there is no page table entry or no present large page entry, it calls fixup_user_fault. However, currently, if the call succeeds, handle_sske returns -EAGAIN, without having set the storage key. Instead, retry by continue'ing the loop without incrementing the address. The same issue in handle_pfmf was fixed by a11bdb1a6b78 ("KVM: s390: Fix pfmf and conditional skey emulation"). Fixes: bd096f644319 ("KVM: s390: Add skey emulation fault handling") Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Christian Borntraeger Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20211022152648.26536-1-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 53da4ceb16a3..417154b314a6 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -397,6 +397,8 @@ static int handle_sske(struct kvm_vcpu *vcpu) mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (rc == -EAGAIN) + continue; if (rc < 0) return rc; start += PAGE_SIZE; From 380d97bd02fca7b9b41aec2d1c767874d602bc78 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 20 Sep 2021 15:24:54 +0200 Subject: [PATCH 18/19] KVM: s390: pv: properly handle page flags for protected guests Introduce variants of the convert and destroy page functions that also clear the PG_arch_1 bit used to mark them as secure pages. The PG_arch_1 flag is always allowed to overindicate; using the new functions introduced here allows to reduce the extent of overindication and thus improve performance. These new functions can only be called on pages for which a reference is already being held. Signed-off-by: Claudio Imbrenda Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20210920132502.36111-7-imbrenda@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/pgtable.h | 9 ++++++--- arch/s390/include/asm/uv.h | 10 ++++++++-- arch/s390/kernel/uv.c | 34 ++++++++++++++++++++++++++++++++- arch/s390/mm/gmap.c | 4 +++- 4 files changed, 50 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b61426c9ef17..e43416950245 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1074,8 +1074,9 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pte_t res; res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); + /* At this point the reference through the mapping is still present */ if (mm_is_protected(mm) && pte_present(res)) - uv_convert_from_secure(pte_val(res) & PAGE_MASK); + uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK); return res; } @@ -1091,8 +1092,9 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pte_t res; res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); + /* At this point the reference through the mapping is still present */ if (mm_is_protected(vma->vm_mm) && pte_present(res)) - uv_convert_from_secure(pte_val(res) & PAGE_MASK); + uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK); return res; } @@ -1116,8 +1118,9 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, } else { res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } + /* At this point the reference through the mapping is still present */ if (mm_is_protected(mm) && pte_present(res)) - uv_convert_from_secure(pte_val(res) & PAGE_MASK); + uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK); return res; } diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 9ab1914c5b95..72d3e49c2860 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -356,8 +356,9 @@ static inline int is_prot_virt_host(void) } int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); -int uv_destroy_page(unsigned long paddr); +int uv_destroy_owned_page(unsigned long paddr); int uv_convert_from_secure(unsigned long paddr); +int uv_convert_owned_from_secure(unsigned long paddr); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); void setup_uv(void); @@ -365,7 +366,7 @@ void setup_uv(void); #define is_prot_virt_host() 0 static inline void setup_uv(void) {} -static inline int uv_destroy_page(unsigned long paddr) +static inline int uv_destroy_owned_page(unsigned long paddr) { return 0; } @@ -374,6 +375,11 @@ static inline int uv_convert_from_secure(unsigned long paddr) { return 0; } + +static inline int uv_convert_owned_from_secure(unsigned long paddr) +{ + return 0; +} #endif #endif /* _ASM_S390_UV_H */ diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 09b80d371409..8b0e62507d62 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -100,7 +100,7 @@ static int uv_pin_shared(unsigned long paddr) * * @paddr: Absolute host address of page to be destroyed */ -int uv_destroy_page(unsigned long paddr) +static int uv_destroy_page(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_DESTR_SEC_STOR, @@ -120,6 +120,22 @@ int uv_destroy_page(unsigned long paddr) return 0; } +/* + * The caller must already hold a reference to the page + */ +int uv_destroy_owned_page(unsigned long paddr) +{ + struct page *page = phys_to_page(paddr); + int rc; + + get_page(page); + rc = uv_destroy_page(paddr); + if (!rc) + clear_bit(PG_arch_1, &page->flags); + put_page(page); + return rc; +} + /* * Requests the Ultravisor to encrypt a guest page and make it * accessible to the host for paging (export). @@ -139,6 +155,22 @@ int uv_convert_from_secure(unsigned long paddr) return 0; } +/* + * The caller must already hold a reference to the page + */ +int uv_convert_owned_from_secure(unsigned long paddr) +{ + struct page *page = phys_to_page(paddr); + int rc; + + get_page(page); + rc = uv_convert_from_secure(paddr); + if (!rc) + clear_bit(PG_arch_1, &page->flags); + put_page(page); + return rc; +} + /* * Calculate the expected ref_count for a page that would otherwise have no * further pins. This was cribbed from similar functions in other places in diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index d63c0ccc5ccd..dfee0ebb2fac 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2684,8 +2684,10 @@ static int __s390_reset_acc(pte_t *ptep, unsigned long addr, { pte_t pte = READ_ONCE(*ptep); + /* There is a reference through the mapping */ if (pte_present(pte)) - WARN_ON_ONCE(uv_destroy_page(pte_val(pte) & PAGE_MASK)); + WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK)); + return 0; } From 3fd8417f2c728d810a3b26d7e2008012ffb7fd01 Mon Sep 17 00:00:00 2001 From: Collin Walling Date: Tue, 26 Oct 2021 22:54:51 -0400 Subject: [PATCH 19/19] KVM: s390: add debug statement for diag 318 CPNC data The diag 318 data contains values that denote information regarding the guest's environment. Currently, it is unecessarily difficult to observe this value (either manually-inserted debug statements, gdb stepping, mem dumping etc). It's useful to observe this information to obtain an at-a-glance view of the guest's environment, so lets add a simple VCPU event that prints the CPNC to the s390dbf logs. Signed-off-by: Collin Walling Acked-by: Christian Borntraeger Link: https://lore.kernel.org/r/20211027025451.290124-1-walling@linux.ibm.com [borntraeger@de.ibm.com]: change debug level to 3 Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6482ea9139bb..c6257f625929 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4255,6 +4255,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu) if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; + VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); } /* * If userspace sets the riccb (e.g. after migration) to a valid state,