KVM: arm64: Refactor user_mem_abort()

Refactor user_mem_abort() to improve code clarity and simplify
assumptions within the function.

Key changes include:

* Immediately set force_pte to true at the beginning of the function if
  logging_active is true. This simplifies the flow and makes the
  condition for forcing a PTE more explicit.

* Remove the misleading comment stating that logging_active is
  guaranteed to never be true for VM_PFNMAP memslots, as this assertion
  is not entirely correct.

* Extract reusable code blocks into new helper functions:
  * prepare_mmu_memcache(): Encapsulates the logic for preparing and
    topping up the MMU page cache.
  * adjust_nested_fault_perms(): Isolates the adjustments to shadow S2
    permissions and the encoding of nested translation levels.

* Update min(a, (long)b) to min_t(long, a, b) for better type safety and
  consistency.

* Perform other minor tidying up of the code.

These changes primarily aim to simplify user_mem_abort() and make its
logic easier to understand and maintain, setting the stage for future
modifications.

Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Tao Chan <chentao@kylinos.cn>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-18-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Fuad Tabba
2025-07-29 15:54:48 -07:00
committed by Paolo Bonzini
parent f029f04ddb
commit 638ea79669

View File

@@ -1477,13 +1477,56 @@ static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
}
}
static int prepare_mmu_memcache(struct kvm_vcpu *vcpu, bool topup_memcache,
void **memcache)
{
int min_pages;
if (!is_protected_kvm_enabled())
*memcache = &vcpu->arch.mmu_page_cache;
else
*memcache = &vcpu->arch.pkvm_memcache;
if (!topup_memcache)
return 0;
min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
if (!is_protected_kvm_enabled())
return kvm_mmu_topup_memory_cache(*memcache, min_pages);
return topup_hyp_memcache(*memcache, min_pages);
}
/*
* Potentially reduce shadow S2 permissions to match the guest's own S2. For
* exec faults, we'd only reach this point if the guest actually allowed it (see
* kvm_s2_handle_perm_fault).
*
* Also encode the level of the original translation in the SW bits of the leaf
* entry as a proxy for the span of that translation. This will be retrieved on
* TLB invalidation from the guest and used to limit the invalidation scope if a
* TTL hint or a range isn't provided.
*/
static void adjust_nested_fault_perms(struct kvm_s2_trans *nested,
enum kvm_pgtable_prot *prot,
bool *writable)
{
*writable &= kvm_s2_trans_writable(nested);
if (!kvm_s2_trans_readable(nested))
*prot &= ~KVM_PGTABLE_PROT_R;
*prot |= kvm_encode_nested_level(nested);
}
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_s2_trans *nested,
struct kvm_memory_slot *memslot, unsigned long hva,
bool fault_is_perm)
{
int ret = 0;
bool write_fault, writable, force_pte = false;
bool topup_memcache;
bool write_fault, writable;
bool exec_fault, mte_allowed, is_vma_cacheable;
bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
unsigned long mmu_seq;
@@ -1495,6 +1538,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
gfn_t gfn;
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
bool force_pte = logging_active;
long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
@@ -1506,17 +1550,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
write_fault = kvm_is_write_fault(vcpu);
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault);
if (fault_is_perm && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
}
if (!is_protected_kvm_enabled())
memcache = &vcpu->arch.mmu_page_cache;
else
memcache = &vcpu->arch.pkvm_memcache;
VM_WARN_ON_ONCE(write_fault && exec_fault);
/*
* Permission faults just need to update the existing leaf entry,
@@ -1524,17 +1558,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* only exception to this is when dirty logging is enabled at runtime
* and a write fault needs to collapse a block entry into a table.
*/
if (!fault_is_perm || (logging_active && write_fault)) {
int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
if (!is_protected_kvm_enabled())
ret = kvm_mmu_topup_memory_cache(memcache, min_pages);
else
ret = topup_hyp_memcache(memcache, min_pages);
if (ret)
return ret;
}
topup_memcache = !fault_is_perm || (logging_active && write_fault);
ret = prepare_mmu_memcache(vcpu, topup_memcache, &memcache);
if (ret)
return ret;
/*
* Let's check if we will get back a huge page backed by hugetlbfs, or
@@ -1548,16 +1575,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
/*
* logging_active is guaranteed to never be true for VM_PFNMAP
* memslots.
*/
if (logging_active) {
force_pte = true;
if (force_pte)
vma_shift = PAGE_SHIFT;
} else {
else
vma_shift = get_vma_page_shift(vma, hva);
}
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
@@ -1609,7 +1630,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
max_map_size = PAGE_SIZE;
force_pte = (max_map_size == PAGE_SIZE);
vma_pagesize = min(vma_pagesize, (long)max_map_size);
vma_pagesize = min_t(long, vma_pagesize, max_map_size);
}
/*
@@ -1642,7 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
* with the smp_wmb() in kvm_mmu_invalidate_end().
*/
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
mmu_seq = kvm->mmu_invalidate_seq;
mmap_read_unlock(current->mm);
pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
@@ -1698,24 +1719,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (exec_fault && s2_force_noncacheable)
return -ENOEXEC;
/*
* Potentially reduce shadow S2 permissions to match the guest's own
* S2. For exec faults, we'd only reach this point if the guest
* actually allowed it (see kvm_s2_handle_perm_fault).
*
* Also encode the level of the original translation in the SW bits
* of the leaf entry as a proxy for the span of that translation.
* This will be retrieved on TLB invalidation from the guest and
* used to limit the invalidation scope if a TTL hint or a range
* isn't provided.
*/
if (nested) {
writable &= kvm_s2_trans_writable(nested);
if (!kvm_s2_trans_readable(nested))
prot &= ~KVM_PGTABLE_PROT_R;
prot |= kvm_encode_nested_level(nested);
}
if (nested)
adjust_nested_fault_perms(nested, &prot, &writable);
kvm_fault_lock(kvm);
pgt = vcpu->arch.hw_mmu->pgt;
@@ -1981,6 +1986,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
goto out_unlock;
}
VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
!write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu));
ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
esr_fsc_is_permission_fault(esr));
if (ret == 0)