Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull MM updates from Andrew Morton:

 - More userfaultfs work from Peter Xu

 - Several convert-to-folios series from Sidhartha Kumar and Huang Ying

 - Some filemap cleanups from Vishal Moola

 - David Hildenbrand added the ability to selftest anon memory COW
   handling

 - Some cpuset simplifications from Liu Shixin

 - Addition of vmalloc tracing support by Uladzislau Rezki

 - Some pagecache folioifications and simplifications from Matthew
   Wilcox

 - A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use
   it

 - Miguel Ojeda contributed some cleanups for our use of the
   __no_sanitize_thread__ gcc keyword.

   This series should have been in the non-MM tree, my bad

 - Naoya Horiguchi improved the interaction between memory poisoning and
   memory section removal for huge pages

 - DAMON cleanups and tuneups from SeongJae Park

 - Tony Luck fixed the handling of COW faults against poisoned pages

 - Peter Xu utilized the PTE marker code for handling swapin errors

 - Hugh Dickins reworked compound page mapcount handling, simplifying it
   and making it more efficient

 - Removal of the autonuma savedwrite infrastructure from Nadav Amit and
   David Hildenbrand

 - zram support for multiple compression streams from Sergey Senozhatsky

 - David Hildenbrand reworked the GUP code's R/O long-term pinning so
   that drivers no longer need to use the FOLL_FORCE workaround which
   didn't work very well anyway

 - Mel Gorman altered the page allocator so that local IRQs can remnain
   enabled during per-cpu page allocations

 - Vishal Moola removed the try_to_release_page() wrapper

 - Stefan Roesch added some per-BDI sysfs tunables which are used to
   prevent network block devices from dirtying excessive amounts of
   pagecache

 - David Hildenbrand did some cleanup and repair work on KSM COW
   breaking

 - Nhat Pham and Johannes Weiner have implemented writeback in zswap's
   zsmalloc backend

 - Brian Foster has fixed a longstanding corner-case oddity in
   file[map]_write_and_wait_range()

 - sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang
   Chen

 - Shiyang Ruan has done some work on fsdax, to make its reflink mode
   work better under xfstests. Better, but still not perfect

 - Christoph Hellwig has removed the .writepage() method from several
   filesystems. They only need .writepages()

 - Yosry Ahmed wrote a series which fixes the memcg reclaim target
   beancounting

 - David Hildenbrand has fixed some of our MM selftests for 32-bit
   machines

 - Many singleton patches, as usual

* tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits)
  mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio
  mm: mmu_gather: allow more than one batch of delayed rmaps
  mm: fix typo in struct pglist_data code comment
  kmsan: fix memcpy tests
  mm: add cond_resched() in swapin_walk_pmd_entry()
  mm: do not show fs mm pc for VM_LOCKONFAULT pages
  selftests/vm: ksm_functional_tests: fixes for 32bit
  selftests/vm: cow: fix compile warning on 32bit
  selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions
  mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem
  mm,thp,rmap: fix races between updates of subpages_mapcount
  mm: memcg: fix swapcached stat accounting
  mm: add nodes= arg to memory.reclaim
  mm: disable top-tier fallback to reclaim on proactive reclaim
  selftests: cgroup: make sure reclaim target memcg is unprotected
  selftests: cgroup: refactor proactive reclaim code to reclaim_until()
  mm: memcg: fix stale protection of reclaim target memcg
  mm/mmap: properly unaccount memory on mas_preallocate() failure
  omfs: remove ->writepage
  jfs: remove ->writepage
  ...
This commit is contained in:
Linus Torvalds
2022-12-13 19:29:45 -08:00
237 changed files with 9295 additions and 5061 deletions

View File

@@ -102,8 +102,18 @@ static inline unsigned long wb_stat_error(void)
#endif
}
/* BDI ratio is expressed as part per 1000000 for finer granularity. */
#define BDI_RATIO_SCALE 10000
u64 bdi_get_min_bytes(struct backing_dev_info *bdi);
u64 bdi_get_max_bytes(struct backing_dev_info *bdi);
int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio);
int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio);
int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes);
int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes);
int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit);
/*
* Flags in backing_dev_info::capability

View File

@@ -82,26 +82,21 @@
#define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
#endif
#if __has_attribute(__no_sanitize_address__)
#define __no_sanitize_address __attribute__((no_sanitize_address))
#else
#define __no_sanitize_address
#endif
#define __no_sanitize_address __attribute__((__no_sanitize_address__))
#if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__)
#define __no_sanitize_thread __attribute__((no_sanitize_thread))
#if defined(__SANITIZE_THREAD__)
#define __no_sanitize_thread __attribute__((__no_sanitize_thread__))
#else
#define __no_sanitize_thread
#endif
#if __has_attribute(__no_sanitize_undefined__)
#define __no_sanitize_undefined __attribute__((no_sanitize_undefined))
#else
#define __no_sanitize_undefined
#endif
#define __no_sanitize_undefined __attribute__((__no_sanitize_undefined__))
/*
* Only supported since gcc >= 12
*/
#if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__)
#define __no_sanitize_coverage __attribute__((no_sanitize_coverage))
#define __no_sanitize_coverage __attribute__((__no_sanitize_coverage__))
#else
#define __no_sanitize_coverage
#endif

View File

@@ -357,6 +357,7 @@ struct damon_operations {
* @after_wmarks_check: Called after each schemes' watermarks check.
* @after_sampling: Called after each sampling.
* @after_aggregation: Called after each aggregation.
* @before_damos_apply: Called before applying DAMOS action.
* @before_terminate: Called before terminating the monitoring.
* @private: User private data.
*
@@ -385,6 +386,10 @@ struct damon_callback {
int (*after_wmarks_check)(struct damon_ctx *context);
int (*after_sampling)(struct damon_ctx *context);
int (*after_aggregation)(struct damon_ctx *context);
int (*before_damos_apply)(struct damon_ctx *context,
struct damon_target *target,
struct damon_region *region,
struct damos *scheme);
void (*before_terminate)(struct damon_ctx *context);
};

View File

@@ -205,6 +205,8 @@ static inline void dax_unlock_mapping_entry(struct address_space *mapping,
}
#endif
int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops);
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
const struct iomap_ops *ops);
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,

View File

@@ -319,6 +319,32 @@ static inline void copy_user_highpage(struct page *to, struct page *from,
#endif
#ifdef copy_mc_to_kernel
static inline int copy_mc_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
{
unsigned long ret;
char *vfrom, *vto;
vfrom = kmap_local_page(from);
vto = kmap_local_page(to);
ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE);
if (!ret)
kmsan_unpoison_memory(page_address(to), PAGE_SIZE);
kunmap_local(vto);
kunmap_local(vfrom);
return ret;
}
#else
static inline int copy_mc_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
{
copy_user_highpage(to, from, vaddr, vma);
return 0;
}
#endif
#ifndef __HAVE_ARCH_COPY_HIGHPAGE
static inline void copy_highpage(struct page *to, struct page *from)

View File

@@ -33,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t;
/*
* For HugeTLB page, there are more metadata to save in the struct page. But
* the head struct page cannot meet our needs, so we have to abuse other tail
* struct page to store the metadata. In order to avoid conflicts caused by
* subsequent use of more tail struct pages, we gather these discrete indexes
* of tail struct page here.
* struct page to store the metadata.
*/
enum {
SUBPAGE_INDEX_SUBPOOL = 1, /* reuse page->private */
#ifdef CONFIG_CGROUP_HUGETLB
SUBPAGE_INDEX_CGROUP, /* reuse page->private */
SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */
__MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD,
#endif
#ifdef CONFIG_MEMORY_FAILURE
SUBPAGE_INDEX_HWPOISON,
#endif
__NR_USED_SUBPAGE,
};
#define __NR_USED_SUBPAGE 3
struct hugepage_subpool {
spinlock_t lock;
@@ -149,6 +136,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
unsigned long len);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
struct vm_area_struct *, struct vm_area_struct *);
struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
unsigned long *, unsigned long *, long, unsigned int,
@@ -181,10 +170,11 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
int isolate_hugetlb(struct page *page, struct list_head *list);
int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
void putback_active_hugepage(struct page *page);
void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
void free_huge_page(struct page *page);
void hugetlb_fix_reserve_counts(struct inode *inode);
extern struct mutex *hugetlb_fault_mutex_table;
@@ -209,17 +199,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write);
struct page *follow_huge_pd(struct vm_area_struct *vma,
unsigned long address, hugepd_t hpd,
int flags, int pdshift);
struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address,
int flags);
struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int flags);
struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
pgd_t *pgd, int flags);
void hugetlb_vma_lock_read(struct vm_area_struct *vma);
void hugetlb_vma_unlock_read(struct vm_area_struct *vma);
@@ -272,6 +251,12 @@ static inline void adjust_range_if_pmd_sharing_possible(
{
}
static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
}
static inline long follow_hugetlb_page(struct mm_struct *mm,
struct vm_area_struct *vma, struct page **pages,
struct vm_area_struct **vmas, unsigned long *position,
@@ -282,12 +267,6 @@ static inline long follow_hugetlb_page(struct mm_struct *mm,
return 0;
}
static inline struct page *follow_huge_addr(struct mm_struct *mm,
unsigned long address, int write)
{
return ERR_PTR(-EINVAL);
}
static inline int copy_hugetlb_page_range(struct mm_struct *dst,
struct mm_struct *src,
struct vm_area_struct *dst_vma,
@@ -320,31 +299,6 @@ static inline void hugetlb_show_meminfo_node(int nid)
{
}
static inline struct page *follow_huge_pd(struct vm_area_struct *vma,
unsigned long address, hugepd_t hpd, int flags,
int pdshift)
{
return NULL;
}
static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma,
unsigned long address, int flags)
{
return NULL;
}
static inline struct page *follow_huge_pud(struct mm_struct *mm,
unsigned long address, pud_t *pud, int flags)
{
return NULL;
}
static inline struct page *follow_huge_pgd(struct mm_struct *mm,
unsigned long address, pgd_t *pgd, int flags)
{
return NULL;
}
static inline int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
@@ -425,12 +379,13 @@ static inline int isolate_hugetlb(struct page *page, struct list_head *list)
return -EBUSY;
}
static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
{
return 0;
}
static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared)
{
return 0;
}
@@ -439,8 +394,8 @@ static inline void putback_active_hugepage(struct page *page)
{
}
static inline void move_hugetlb_state(struct page *oldpage,
struct page *newpage, int reason)
static inline void move_hugetlb_state(struct folio *old_folio,
struct folio *new_folio, int reason)
{
}
@@ -623,26 +578,50 @@ enum hugetlb_page_flags {
*/
#ifdef CONFIG_HUGETLB_PAGE
#define TESTHPAGEFLAG(uname, flname) \
static __always_inline \
bool folio_test_hugetlb_##flname(struct folio *folio) \
{ void *private = &folio->private; \
return test_bit(HPG_##flname, private); \
} \
static inline int HPage##uname(struct page *page) \
{ return test_bit(HPG_##flname, &(page->private)); }
#define SETHPAGEFLAG(uname, flname) \
static __always_inline \
void folio_set_hugetlb_##flname(struct folio *folio) \
{ void *private = &folio->private; \
set_bit(HPG_##flname, private); \
} \
static inline void SetHPage##uname(struct page *page) \
{ set_bit(HPG_##flname, &(page->private)); }
#define CLEARHPAGEFLAG(uname, flname) \
static __always_inline \
void folio_clear_hugetlb_##flname(struct folio *folio) \
{ void *private = &folio->private; \
clear_bit(HPG_##flname, private); \
} \
static inline void ClearHPage##uname(struct page *page) \
{ clear_bit(HPG_##flname, &(page->private)); }
#else
#define TESTHPAGEFLAG(uname, flname) \
static inline bool \
folio_test_hugetlb_##flname(struct folio *folio) \
{ return 0; } \
static inline int HPage##uname(struct page *page) \
{ return 0; }
#define SETHPAGEFLAG(uname, flname) \
static inline void \
folio_set_hugetlb_##flname(struct folio *folio) \
{ } \
static inline void SetHPage##uname(struct page *page) \
{ }
#define CLEARHPAGEFLAG(uname, flname) \
static inline void \
folio_clear_hugetlb_##flname(struct folio *folio) \
{ } \
static inline void ClearHPage##uname(struct page *page) \
{ }
#endif
@@ -728,18 +707,29 @@ extern unsigned int default_hstate_idx;
#define default_hstate (hstates[default_hstate_idx])
static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
{
return folio->_hugetlb_subpool;
}
/*
* hugetlb page subpool pointer located in hpage[1].private
* hugetlb page subpool pointer located in hpage[2].hugetlb_subpool
*/
static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
{
return (void *)page_private(hpage + SUBPAGE_INDEX_SUBPOOL);
return hugetlb_folio_subpool(page_folio(hpage));
}
static inline void hugetlb_set_folio_subpool(struct folio *folio,
struct hugepage_subpool *subpool)
{
folio->_hugetlb_subpool = subpool;
}
static inline void hugetlb_set_page_subpool(struct page *hpage,
struct hugepage_subpool *subpool)
{
set_page_private(hpage + SUBPAGE_INDEX_SUBPOOL, (unsigned long)subpool);
hugetlb_set_folio_subpool(page_folio(hpage), subpool);
}
static inline struct hstate *hstate_file(struct file *f)
@@ -823,10 +813,15 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
}
#endif
static inline struct hstate *folio_hstate(struct folio *folio)
{
VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
return size_to_hstate(folio_size(folio));
}
static inline struct hstate *page_hstate(struct page *page)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
return size_to_hstate(page_size(page));
return folio_hstate(page_folio(page));
}
static inline unsigned hstate_index_to_shift(unsigned index)
@@ -983,6 +978,11 @@ void hugetlb_unregister_node(struct node *node);
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
{
return NULL;
}
static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
{
return NULL;
@@ -1035,6 +1035,11 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
return NULL;
}
static inline struct hstate *folio_hstate(struct folio *folio)
{
return NULL;
}
static inline struct hstate *page_hstate(struct page *page)
{
return NULL;

View File

@@ -24,12 +24,10 @@ struct file_region;
#ifdef CONFIG_CGROUP_HUGETLB
/*
* Minimum page order trackable by hugetlb cgroup.
* At least 4 pages are necessary for all the tracking information.
* The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault
* usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD])
* is the reservation usage cgroup.
* At least 3 pages are necessary for all the tracking information.
* The second tail page contains all of the hugetlb-specific fields.
*/
#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1)
#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE)
enum hugetlb_memory_event {
HUGETLB_MAX,
@@ -67,54 +65,50 @@ struct hugetlb_cgroup {
};
static inline struct hugetlb_cgroup *
__hugetlb_cgroup_from_page(struct page *page, bool rsvd)
__hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
return NULL;
if (rsvd)
return (void *)page_private(page + SUBPAGE_INDEX_CGROUP_RSVD);
return folio->_hugetlb_cgroup_rsvd;
else
return (void *)page_private(page + SUBPAGE_INDEX_CGROUP);
return folio->_hugetlb_cgroup;
}
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
{
return __hugetlb_cgroup_from_page(page, false);
return __hugetlb_cgroup_from_folio(folio, false);
}
static inline struct hugetlb_cgroup *
hugetlb_cgroup_from_page_rsvd(struct page *page)
hugetlb_cgroup_from_folio_rsvd(struct folio *folio)
{
return __hugetlb_cgroup_from_page(page, true);
return __hugetlb_cgroup_from_folio(folio, true);
}
static inline void __set_hugetlb_cgroup(struct page *page,
static inline void __set_hugetlb_cgroup(struct folio *folio,
struct hugetlb_cgroup *h_cg, bool rsvd)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
return;
if (rsvd)
set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD,
(unsigned long)h_cg);
folio->_hugetlb_cgroup_rsvd = h_cg;
else
set_page_private(page + SUBPAGE_INDEX_CGROUP,
(unsigned long)h_cg);
folio->_hugetlb_cgroup = h_cg;
}
static inline void set_hugetlb_cgroup(struct page *page,
static inline void set_hugetlb_cgroup(struct folio *folio,
struct hugetlb_cgroup *h_cg)
{
__set_hugetlb_cgroup(page, h_cg, false);
__set_hugetlb_cgroup(folio, h_cg, false);
}
static inline void set_hugetlb_cgroup_rsvd(struct page *page,
static inline void set_hugetlb_cgroup_rsvd(struct folio *folio,
struct hugetlb_cgroup *h_cg)
{
__set_hugetlb_cgroup(page, h_cg, true);
__set_hugetlb_cgroup(folio, h_cg, true);
}
static inline bool hugetlb_cgroup_disabled(void)
@@ -151,10 +145,10 @@ extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
struct page *page);
extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
struct page *page);
extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
struct page *page);
extern void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
struct folio *folio);
extern void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages,
struct folio *folio);
extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg);
@@ -170,8 +164,8 @@ extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
bool region_del);
extern void hugetlb_cgroup_file_init(void) __init;
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
struct page *newhpage);
extern void hugetlb_cgroup_migrate(struct folio *old_folio,
struct folio *new_folio);
#else
static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
@@ -181,29 +175,23 @@ static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
{
}
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
{
return NULL;
}
static inline struct hugetlb_cgroup *
hugetlb_cgroup_from_page_resv(struct page *page)
hugetlb_cgroup_from_folio_rsvd(struct folio *folio)
{
return NULL;
}
static inline struct hugetlb_cgroup *
hugetlb_cgroup_from_page_rsvd(struct page *page)
{
return NULL;
}
static inline void set_hugetlb_cgroup(struct page *page,
static inline void set_hugetlb_cgroup(struct folio *folio,
struct hugetlb_cgroup *h_cg)
{
}
static inline void set_hugetlb_cgroup_rsvd(struct page *page,
static inline void set_hugetlb_cgroup_rsvd(struct folio *folio,
struct hugetlb_cgroup *h_cg)
{
}
@@ -253,14 +241,14 @@ hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
{
}
static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
struct page *page)
static inline void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
struct folio *folio)
{
}
static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx,
static inline void hugetlb_cgroup_uncharge_folio_rsvd(int idx,
unsigned long nr_pages,
struct page *page)
struct folio *folio)
{
}
static inline void hugetlb_cgroup_uncharge_cgroup(int idx,
@@ -285,8 +273,8 @@ static inline void hugetlb_cgroup_file_init(void)
{
}
static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
struct page *newhpage)
static inline void hugetlb_cgroup_migrate(struct folio *old_folio,
struct folio *new_folio)
{
}

View File

@@ -15,6 +15,7 @@ extern void __khugepaged_exit(struct mm_struct *mm);
extern void khugepaged_enter_vma(struct vm_area_struct *vma,
unsigned long vm_flags);
extern void khugepaged_min_free_kbytes_update(void);
extern bool current_is_khugepaged(void);
#ifdef CONFIG_SHMEM
extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
bool install_pmd);
@@ -57,6 +58,11 @@ static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
static inline void khugepaged_min_free_kbytes_update(void)
{
}
static inline bool current_is_khugepaged(void)
{
return false;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_KHUGEPAGED_H */

View File

@@ -615,28 +615,32 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root,
void mem_cgroup_calculate_protection(struct mem_cgroup *root,
struct mem_cgroup *memcg);
static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg)
static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
struct mem_cgroup *memcg)
{
/*
* The root memcg doesn't account charges, and doesn't support
* protection.
* protection. The target memcg's protection is ignored, see
* mem_cgroup_calculate_protection() and mem_cgroup_protection()
*/
return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg);
return mem_cgroup_disabled() || mem_cgroup_is_root(memcg) ||
memcg == target;
}
static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
struct mem_cgroup *memcg)
{
if (!mem_cgroup_supports_protection(memcg))
if (mem_cgroup_unprotected(target, memcg))
return false;
return READ_ONCE(memcg->memory.elow) >=
page_counter_read(&memcg->memory);
}
static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
struct mem_cgroup *memcg)
{
if (!mem_cgroup_supports_protection(memcg))
if (mem_cgroup_unprotected(target, memcg))
return false;
return READ_ONCE(memcg->memory.emin) >=
@@ -1209,12 +1213,19 @@ static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
{
}
static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
struct mem_cgroup *memcg)
{
return true;
}
static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
struct mem_cgroup *memcg)
{
return false;
}
static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
struct mem_cgroup *memcg)
{
return false;
}

View File

@@ -18,7 +18,6 @@
* the same memory tier.
*/
#define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1))
#define MEMTIER_HOTPLUG_PRIO 100
struct memory_tier;
struct memory_dev_type {

View File

@@ -19,7 +19,6 @@
#include <linux/node.h>
#include <linux/compiler.h>
#include <linux/mutex.h>
#include <linux/notifier.h>
#define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS)
@@ -85,6 +84,9 @@ struct memory_block {
unsigned long nr_vmemmap_pages;
struct memory_group *group; /* group (if any) for this block */
struct list_head group_next; /* next block inside memory group */
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
atomic_long_t nr_hwpoison;
#endif
};
int arch_get_memory_phys_device(unsigned long start_pfn);
@@ -113,8 +115,13 @@ struct mem_section;
* Priorities for the hotplug memory callback routines (stored in decreasing
* order in the callback chain)
*/
#define SLAB_CALLBACK_PRI 1
#define IPC_CALLBACK_PRI 10
#define DEFAULT_CALLBACK_PRI 0
#define SLAB_CALLBACK_PRI 1
#define HMAT_CALLBACK_PRI 2
#define MM_COMPUTE_BATCH_PRI 10
#define CPUSET_CALLBACK_PRI 10
#define MEMTIER_HOTPLUG_PRI 100
#define KSM_CALLBACK_PRI 100
#ifndef CONFIG_MEMORY_HOTPLUG
static inline void memory_dev_init(void)
@@ -136,9 +143,6 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
{
return 0;
}
/* These aren't inline functions due to a GCC bug. */
#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; })
#define unregister_hotmemory_notifier(nb) ({ (void)(nb); })
#else /* CONFIG_MEMORY_HOTPLUG */
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
@@ -166,8 +170,6 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
{ .notifier_call = fn, .priority = pri };\
register_memory_notifier(&fn##_mem_nb); \
})
#define register_hotmemory_notifier(nb) register_memory_notifier(nb)
#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb)
#ifdef CONFIG_NUMA
void memory_block_add_nid(struct memory_block *mem, int nid,

View File

@@ -74,6 +74,7 @@ static inline void totalram_pages_add(long count)
extern void * high_memory;
extern int page_cluster;
extern const int page_cluster_max;
#ifdef CONFIG_SYSCTL
extern int sysctl_legacy_va_layout;
@@ -549,7 +550,7 @@ struct vm_operations_struct {
/*
* Called by mprotect() to make driver-specific permission
* checks before mprotect() is finalised. The VMA must not
* be modified. Returns 0 if eprotect() can proceed.
* be modified. Returns 0 if mprotect() can proceed.
*/
int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
unsigned long end, unsigned long newflags);
@@ -699,8 +700,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
* paths in userfault.
*/
bool vma_is_shmem(struct vm_area_struct *vma);
bool vma_is_anon_shmem(struct vm_area_struct *vma);
#else
static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
#endif
int vma_is_stack_for_current(struct vm_area_struct *vma);
@@ -817,8 +820,8 @@ static inline int is_vmalloc_or_module_addr(const void *x)
/*
* How many times the entire folio is mapped as a single unit (eg by a
* PMD or PUD entry). This is probably not what you want, except for
* debugging purposes; look at folio_mapcount() or page_mapcount()
* instead.
* debugging purposes - it does not include PTE-mapped sub-pages; look
* at folio_mapcount() or page_mapcount() or total_mapcount() instead.
*/
static inline int folio_entire_mapcount(struct folio *folio)
{
@@ -828,12 +831,29 @@ static inline int folio_entire_mapcount(struct folio *folio)
/*
* Mapcount of compound page as a whole, does not include mapped sub-pages.
*
* Must be called only for compound pages.
* Must be called only on head of compound page.
*/
static inline int compound_mapcount(struct page *page)
static inline int head_compound_mapcount(struct page *head)
{
return folio_entire_mapcount(page_folio(page));
return atomic_read(compound_mapcount_ptr(head)) + 1;
}
/*
* If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages,
* its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit
* above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently
* leaves subpages_mapcount at 0, but avoid surprise if it participates later.
*/
#define COMPOUND_MAPPED 0x800000
#define SUBPAGES_MAPPED (COMPOUND_MAPPED - 1)
/*
* Number of sub-pages mapped by PTE, does not include compound mapcount.
* Must be called only on head of compound page.
*/
static inline int head_subpages_mapcount(struct page *head)
{
return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED;
}
/*
@@ -846,11 +866,9 @@ static inline void page_mapcount_reset(struct page *page)
atomic_set(&(page)->_mapcount, -1);
}
int __page_mapcount(struct page *page);
/*
* Mapcount of 0-order page; when compound sub-page, includes
* compound_mapcount().
* compound_mapcount of compound_head of page.
*
* Result is undefined for pages which cannot be mapped into userspace.
* For example SLAB or special types of pages. See function page_has_type().
@@ -858,25 +876,75 @@ int __page_mapcount(struct page *page);
*/
static inline int page_mapcount(struct page *page)
{
if (unlikely(PageCompound(page)))
return __page_mapcount(page);
return atomic_read(&page->_mapcount) + 1;
int mapcount = atomic_read(&page->_mapcount) + 1;
if (likely(!PageCompound(page)))
return mapcount;
page = compound_head(page);
return head_compound_mapcount(page) + mapcount;
}
int folio_mapcount(struct folio *folio);
int total_compound_mapcount(struct page *head);
/**
* folio_mapcount() - Calculate the number of mappings of this folio.
* @folio: The folio.
*
* A large folio tracks both how many times the entire folio is mapped,
* and how many times each individual page in the folio is mapped.
* This function calculates the total number of times the folio is
* mapped.
*
* Return: The number of times this folio is mapped.
*/
static inline int folio_mapcount(struct folio *folio)
{
if (likely(!folio_test_large(folio)))
return atomic_read(&folio->_mapcount) + 1;
return total_compound_mapcount(&folio->page);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int total_mapcount(struct page *page)
{
return folio_mapcount(page_folio(page));
if (likely(!PageCompound(page)))
return atomic_read(&page->_mapcount) + 1;
return total_compound_mapcount(compound_head(page));
}
#else
static inline int total_mapcount(struct page *page)
static inline bool folio_large_is_mapped(struct folio *folio)
{
return page_mapcount(page);
/*
* Reading folio_mapcount_ptr() below could be omitted if hugetlb
* participated in incrementing subpages_mapcount when compound mapped.
*/
return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 ||
atomic_read(folio_mapcount_ptr(folio)) >= 0;
}
/**
* folio_mapped - Is this folio mapped into userspace?
* @folio: The folio.
*
* Return: True if any page in this folio is referenced by user page tables.
*/
static inline bool folio_mapped(struct folio *folio)
{
if (likely(!folio_test_large(folio)))
return atomic_read(&folio->_mapcount) >= 0;
return folio_large_is_mapped(folio);
}
/*
* Return true if this page is mapped into pagetables.
* For compound page it returns true if any sub-page of compound page is mapped,
* even if this particular sub-page is not itself mapped by any PTE or PMD.
*/
static inline bool page_mapped(struct page *page)
{
if (likely(!PageCompound(page)))
return atomic_read(&page->_mapcount) >= 0;
return folio_large_is_mapped(page_folio(page));
}
#endif
static inline struct page *virt_to_head_page(const void *x)
{
@@ -929,6 +997,13 @@ static inline void set_compound_page_dtor(struct page *page,
page[1].compound_dtor = compound_dtor;
}
static inline void folio_set_compound_dtor(struct folio *folio,
enum compound_dtor_id compound_dtor)
{
VM_BUG_ON_FOLIO(compound_dtor >= NR_COMPOUND_DTORS, folio);
folio->_folio_dtor = compound_dtor;
}
void destroy_large_folio(struct folio *folio);
static inline int head_compound_pincount(struct page *head)
@@ -944,6 +1019,22 @@ static inline void set_compound_order(struct page *page, unsigned int order)
#endif
}
/*
* folio_set_compound_order is generally passed a non-zero order to
* initialize a large folio. However, hugetlb code abuses this by
* passing in zero when 'dissolving' a large folio.
*/
static inline void folio_set_compound_order(struct folio *folio,
unsigned int order)
{
VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
folio->_folio_order = order;
#ifdef CONFIG_64BIT
folio->_folio_nr_pages = order ? 1U << order : 0;
#endif
}
/* Returns the number of pages in this potentially compound page. */
static inline unsigned long compound_nr(struct page *page)
{
@@ -1179,7 +1270,24 @@ static inline void folio_put_refs(struct folio *folio, int refs)
__folio_put(folio);
}
void release_pages(struct page **pages, int nr);
/**
* release_pages - release an array of pages or folios
*
* This just releases a simple array of multiple pages, and
* accepts various different forms of said page array: either
* a regular old boring array of pages, an array of folios, or
* an array of encoded page pointers.
*
* The transparent union syntax for this kind of "any of these
* argument types" is all kinds of ugly, so look away.
*/
typedef union {
struct page **pages;
struct folio **folios;
struct encoded_page **encoded_pages;
} release_pages_arg __attribute__ ((__transparent_union__));
void release_pages(release_pages_arg, int nr);
/**
* folios_put - Decrement the reference count on an array of folios.
@@ -1195,7 +1303,7 @@ void release_pages(struct page **pages, int nr);
*/
static inline void folios_put(struct folio **folios, unsigned int nr)
{
release_pages((struct page **)folios, nr);
release_pages(folios, nr);
}
static inline void put_page(struct page *page)
@@ -1799,9 +1907,6 @@ static inline pgoff_t page_index(struct page *page)
return page->index;
}
bool page_mapped(struct page *page);
bool folio_mapped(struct folio *folio);
/*
* Return true only if the page has been allocated with
* ALLOC_NO_WATERMARKS and the low watermark was not
@@ -2025,6 +2130,22 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \
MM_CP_UFFD_WP_RESOLVE)
int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
{
/*
* We want to check manually if we can change individual PTEs writable
* if we can't do that automatically for all PTEs in a mapping. For
* private mappings, that's always the case when we have write
* permissions as we properly have to handle COW.
*/
if (vma->vm_flags & VM_SHARED)
return vma_wants_writenotify(vma, vma->vm_page_prot);
return !!(vma->vm_flags & VM_WRITE);
}
bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
extern unsigned long change_protection(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgprot_t newprot,
@@ -2051,40 +2172,30 @@ static inline bool get_user_page_fast_only(unsigned long addr,
*/
static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
{
long val = atomic_long_read(&mm->rss_stat.count[member]);
#ifdef SPLIT_RSS_COUNTING
/*
* counter is updated in asynchronous manner and may go to minus.
* But it's never be expected number for users.
*/
if (val < 0)
val = 0;
#endif
return (unsigned long)val;
return percpu_counter_read_positive(&mm->rss_stat[member]);
}
void mm_trace_rss_stat(struct mm_struct *mm, int member, long count);
void mm_trace_rss_stat(struct mm_struct *mm, int member);
static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
{
long count = atomic_long_add_return(value, &mm->rss_stat.count[member]);
percpu_counter_add(&mm->rss_stat[member], value);
mm_trace_rss_stat(mm, member, count);
mm_trace_rss_stat(mm, member);
}
static inline void inc_mm_counter(struct mm_struct *mm, int member)
{
long count = atomic_long_inc_return(&mm->rss_stat.count[member]);
percpu_counter_inc(&mm->rss_stat[member]);
mm_trace_rss_stat(mm, member, count);
mm_trace_rss_stat(mm, member);
}
static inline void dec_mm_counter(struct mm_struct *mm, int member)
{
long count = atomic_long_dec_return(&mm->rss_stat.count[member]);
percpu_counter_dec(&mm->rss_stat[member]);
mm_trace_rss_stat(mm, member, count);
mm_trace_rss_stat(mm, member);
}
/* Optimized variant when page is already known not to be PageAnon */
@@ -2174,8 +2285,6 @@ static inline int pte_devmap(pte_t pte)
}
#endif
int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
spinlock_t **ptl);
static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -2424,7 +2533,7 @@ static inline void pgtable_pte_page_dtor(struct page *page)
#if USE_SPLIT_PMD_PTLOCKS
static struct page *pmd_to_page(pmd_t *pmd)
static inline struct page *pmd_pgtable_page(pmd_t *pmd)
{
unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
return virt_to_page((void *)((unsigned long) pmd & mask));
@@ -2432,7 +2541,7 @@ static struct page *pmd_to_page(pmd_t *pmd)
static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
return ptlock_ptr(pmd_to_page(pmd));
return ptlock_ptr(pmd_pgtable_page(pmd));
}
static inline bool pmd_ptlock_init(struct page *page)
@@ -2451,7 +2560,7 @@ static inline void pmd_ptlock_free(struct page *page)
ptlock_free(page);
}
#define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte)
#define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte)
#else
@@ -2971,7 +3080,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
* and return without waiting upon it */
#define FOLL_NOFAULT 0x80 /* do not fault in pages */
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
#define FOLL_ANON 0x8000 /* don't do file mappings */
@@ -3064,8 +3172,12 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
* Must be called with the (sub)page that's actually referenced via the
* page table entry, which might not necessarily be the head page for a
* PTE-mapped THP.
*
* If the vma is NULL, we're coming from the GUP-fast path and might have
* to fallback to the slow path just to lookup the vma.
*/
static inline bool gup_must_unshare(unsigned int flags, struct page *page)
static inline bool gup_must_unshare(struct vm_area_struct *vma,
unsigned int flags, struct page *page)
{
/*
* FOLL_WRITE is implicitly handled correctly as the page table entry
@@ -3078,8 +3190,25 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page)
* Note: PageAnon(page) is stable until the page is actually getting
* freed.
*/
if (!PageAnon(page))
return false;
if (!PageAnon(page)) {
/*
* We only care about R/O long-term pining: R/O short-term
* pinning does not have the semantics to observe successive
* changes through the process page tables.
*/
if (!(flags & FOLL_LONGTERM))
return false;
/* We really need the vma ... */
if (!vma)
return true;
/*
* ... because we only care about writable private ("COW")
* mappings where we have to break COW early.
*/
return is_cow_mapping(vma->vm_flags);
}
/* Paired with a memory barrier in page_try_share_anon_rmap(). */
if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
@@ -3255,6 +3384,8 @@ void *sparse_buffer_alloc(unsigned long size);
struct page * __populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap);
void pmd_init(void *addr);
void pud_init(void *addr);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
@@ -3266,8 +3397,14 @@ struct vmem_altmap;
void *vmemmap_alloc_block_buf(unsigned long size, int node,
struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
void vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
unsigned long addr, unsigned long next);
int vmemmap_check_pmd(pmd_t *pmd, int node,
unsigned long addr, unsigned long next);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node, struct vmem_altmap *altmap);
int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
@@ -3290,7 +3427,6 @@ enum mf_flags {
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
unsigned long count, int mf_flags);
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
extern int sysctl_memory_failure_early_kill;
@@ -3299,12 +3435,42 @@ extern void shake_page(struct page *p);
extern atomic_long_t num_poisoned_pages __read_mostly;
extern int soft_offline_page(unsigned long pfn, int flags);
#ifdef CONFIG_MEMORY_FAILURE
extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
void num_poisoned_pages_inc(unsigned long pfn);
void num_poisoned_pages_sub(unsigned long pfn, long i);
#else
static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
static inline void memory_failure_queue(unsigned long pfn, int flags)
{
}
static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared)
{
return 0;
}
static inline void num_poisoned_pages_inc(unsigned long pfn)
{
}
static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
{
}
#endif
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
extern void memblk_nr_poison_inc(unsigned long pfn);
extern void memblk_nr_poison_sub(unsigned long pfn, long i);
#else
static inline void memblk_nr_poison_inc(unsigned long pfn)
{
}
static inline void memblk_nr_poison_sub(unsigned long pfn, long i)
{
}
#endif
#ifndef arch_memory_failure

View File

@@ -18,6 +18,7 @@
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <linux/seqlock.h>
#include <linux/percpu_counter.h>
#include <asm/mmu.h>
@@ -67,7 +68,7 @@ struct mem_cgroup;
#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
#define _struct_page_alignment __aligned(2 * sizeof(unsigned long))
#else
#define _struct_page_alignment
#define _struct_page_alignment __aligned(sizeof(unsigned long))
#endif
struct page {
@@ -103,7 +104,10 @@ struct page {
};
/* See page-flags.h for PAGE_MAPPING_FLAGS */
struct address_space *mapping;
pgoff_t index; /* Our offset within mapping. */
union {
pgoff_t index; /* Our offset within mapping. */
unsigned long share; /* share count for fsdax */
};
/**
* @private: Mapping-private opaque data.
* Usually used for buffer_heads if PagePrivate.
@@ -141,17 +145,26 @@ struct page {
unsigned char compound_dtor;
unsigned char compound_order;
atomic_t compound_mapcount;
atomic_t subpages_mapcount;
atomic_t compound_pincount;
#ifdef CONFIG_64BIT
unsigned int compound_nr; /* 1 << compound_order */
#endif
};
struct { /* Second tail page of compound page */
struct { /* Second tail page of transparent huge page */
unsigned long _compound_pad_1; /* compound_head */
unsigned long _compound_pad_2;
/* For both global and memcg */
struct list_head deferred_list;
};
struct { /* Second tail page of hugetlb page */
unsigned long _hugetlb_pad_1; /* compound_head */
void *hugetlb_subpool;
void *hugetlb_cgroup;
void *hugetlb_cgroup_rsvd;
void *hugetlb_hwpoison;
/* No more space on 32-bit: use third tail if more */
};
struct { /* Page table pages */
unsigned long _pt_pad_1; /* compound_head */
pgtable_t pmd_huge_pte; /* protected by page->ptl */
@@ -241,6 +254,38 @@ struct page {
#endif
} _struct_page_alignment;
/*
* struct encoded_page - a nonexistent type marking this pointer
*
* An 'encoded_page' pointer is a pointer to a regular 'struct page', but
* with the low bits of the pointer indicating extra context-dependent
* information. Not super-common, but happens in mmu_gather and mlock
* handling, and this acts as a type system check on that use.
*
* We only really have two guaranteed bits in general, although you could
* play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
* for more.
*
* Use the supplied helper functions to endcode/decode the pointer and bits.
*/
struct encoded_page;
#define ENCODE_PAGE_BITS 3ul
static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags)
{
BUILD_BUG_ON(flags > ENCODE_PAGE_BITS);
return (struct encoded_page *)(flags | (unsigned long)page);
}
static inline unsigned long encoded_page_flags(struct encoded_page *page)
{
return ENCODE_PAGE_BITS & (unsigned long)page;
}
static inline struct page *encoded_page_ptr(struct encoded_page *page)
{
return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page);
}
/**
* struct folio - Represents a contiguous set of bytes.
* @flags: Identical to the page flags.
@@ -258,12 +303,19 @@ struct page {
* to find how many references there are to this folio.
* @memcg_data: Memory Control Group data.
* @_flags_1: For large folios, additional page flags.
* @__head: Points to the folio. Do not use.
* @_head_1: Points to the folio. Do not use.
* @_folio_dtor: Which destructor to use for this folio.
* @_folio_order: Do not use directly, call folio_order().
* @_total_mapcount: Do not use directly, call folio_entire_mapcount().
* @_compound_mapcount: Do not use directly, call folio_entire_mapcount().
* @_subpages_mapcount: Do not use directly, call folio_mapcount().
* @_pincount: Do not use directly, call folio_maybe_dma_pinned().
* @_folio_nr_pages: Do not use directly, call folio_nr_pages().
* @_flags_2: For alignment. Do not use.
* @_head_2: Points to the folio. Do not use.
* @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
* @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h.
* @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
* @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head().
*
* A folio is a physically, virtually and logically contiguous set
* of bytes. It is a power-of-two in size, and it is aligned to that
@@ -302,15 +354,32 @@ struct folio {
};
struct page page;
};
unsigned long _flags_1;
unsigned long __head;
unsigned char _folio_dtor;
unsigned char _folio_order;
atomic_t _total_mapcount;
atomic_t _pincount;
union {
struct {
unsigned long _flags_1;
unsigned long _head_1;
unsigned char _folio_dtor;
unsigned char _folio_order;
atomic_t _compound_mapcount;
atomic_t _subpages_mapcount;
atomic_t _pincount;
#ifdef CONFIG_64BIT
unsigned int _folio_nr_pages;
unsigned int _folio_nr_pages;
#endif
};
struct page __page_1;
};
union {
struct {
unsigned long _flags_2;
unsigned long _head_2;
void *_hugetlb_subpool;
void *_hugetlb_cgroup;
void *_hugetlb_cgroup_rsvd;
void *_hugetlb_hwpoison;
};
struct page __page_2;
};
};
#define FOLIO_MATCH(pg, fl) \
@@ -331,15 +400,26 @@ FOLIO_MATCH(memcg_data, memcg_data);
static_assert(offsetof(struct folio, fl) == \
offsetof(struct page, pg) + sizeof(struct page))
FOLIO_MATCH(flags, _flags_1);
FOLIO_MATCH(compound_head, __head);
FOLIO_MATCH(compound_head, _head_1);
FOLIO_MATCH(compound_dtor, _folio_dtor);
FOLIO_MATCH(compound_order, _folio_order);
FOLIO_MATCH(compound_mapcount, _total_mapcount);
FOLIO_MATCH(compound_mapcount, _compound_mapcount);
FOLIO_MATCH(subpages_mapcount, _subpages_mapcount);
FOLIO_MATCH(compound_pincount, _pincount);
#ifdef CONFIG_64BIT
FOLIO_MATCH(compound_nr, _folio_nr_pages);
#endif
#undef FOLIO_MATCH
#define FOLIO_MATCH(pg, fl) \
static_assert(offsetof(struct folio, fl) == \
offsetof(struct page, pg) + 2 * sizeof(struct page))
FOLIO_MATCH(flags, _flags_2);
FOLIO_MATCH(compound_head, _head_2);
FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool);
FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup);
FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd);
FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison);
#undef FOLIO_MATCH
static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
{
@@ -347,11 +427,22 @@ static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
return &tail->compound_mapcount;
}
static inline atomic_t *folio_subpages_mapcount_ptr(struct folio *folio)
{
struct page *tail = &folio->page + 1;
return &tail->subpages_mapcount;
}
static inline atomic_t *compound_mapcount_ptr(struct page *page)
{
return &page[1].compound_mapcount;
}
static inline atomic_t *subpages_mapcount_ptr(struct page *page)
{
return &page[1].subpages_mapcount;
}
static inline atomic_t *compound_pincount_ptr(struct page *page)
{
return &page[1].compound_pincount;
@@ -461,21 +552,11 @@ struct vm_area_struct {
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
*
* For private anonymous mappings, a pointer to a null terminated string
* containing the name given to the vma, or NULL if unnamed.
*/
union {
struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared;
/*
* Serialized by mmap_sem. Never use directly because it is
* valid only when vm_file is NULL. Use anon_vma_name instead.
*/
struct anon_vma_name *anon_name;
};
struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared;
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@ -496,6 +577,14 @@ struct vm_area_struct {
struct file * vm_file; /* File we map to (can be NULL). */
void * vm_private_data; /* was vm_pte (shared mem) */
#ifdef CONFIG_ANON_VMA_NAME
/*
* For private and shared anonymous mappings, a pointer to a null
* terminated string containing the name given to the vma, or NULL if
* unnamed. Serialized by mmap_sem. Use anon_vma_name to access.
*/
struct anon_vma_name *anon_name;
#endif
#ifdef CONFIG_SWAP
atomic_long_t swap_readahead_info;
#endif
@@ -612,11 +701,7 @@ struct mm_struct {
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
/*
* Special counters, in some configurations protected by the
* page_table_lock, in other configurations by being atomic.
*/
struct mm_rss_stat rss_stat;
struct percpu_counter rss_stat[NR_MM_COUNTERS];
struct linux_binfmt *binfmt;
@@ -847,7 +932,6 @@ typedef __bitwise unsigned int vm_fault_t;
* @VM_FAULT_OOM: Out Of Memory
* @VM_FAULT_SIGBUS: Bad access
* @VM_FAULT_MAJOR: Page read from storage
* @VM_FAULT_WRITE: Special case for get_user_pages
* @VM_FAULT_HWPOISON: Hit poisoned small page
* @VM_FAULT_HWPOISON_LARGE: Hit poisoned large page. Index encoded
* in upper bits
@@ -868,7 +952,6 @@ enum vm_fault_reason {
VM_FAULT_OOM = (__force vm_fault_t)0x000001,
VM_FAULT_SIGBUS = (__force vm_fault_t)0x000002,
VM_FAULT_MAJOR = (__force vm_fault_t)0x000004,
VM_FAULT_WRITE = (__force vm_fault_t)0x000008,
VM_FAULT_HWPOISON = (__force vm_fault_t)0x000010,
VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020,
VM_FAULT_SIGSEGV = (__force vm_fault_t)0x000040,
@@ -894,7 +977,6 @@ enum vm_fault_reason {
{ VM_FAULT_OOM, "OOM" }, \
{ VM_FAULT_SIGBUS, "SIGBUS" }, \
{ VM_FAULT_MAJOR, "MAJOR" }, \
{ VM_FAULT_WRITE, "WRITE" }, \
{ VM_FAULT_HWPOISON, "HWPOISON" }, \
{ VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \
{ VM_FAULT_SIGSEGV, "SIGSEGV" }, \
@@ -957,9 +1039,9 @@ typedef struct {
* @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
* @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
* @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
* @FAULT_FLAG_UNSHARE: The fault is an unsharing request to unshare (and mark
* exclusive) a possibly shared anonymous page that is
* mapped R/O.
* @FAULT_FLAG_UNSHARE: The fault is an unsharing request to break COW in a
* COW mapping, making sure that an exclusive anon page is
* mapped after the fault.
* @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached.
* We should only access orig_pte if this flag set.
*
@@ -984,7 +1066,7 @@ typedef struct {
*
* The combination FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE is illegal.
* FAULT_FLAG_UNSHARE is ignored and treated like an ordinary read fault when
* no existing R/O-mapped anonymous page is encountered.
* applied to mappings that are not COW mappings.
*/
enum fault_flag {
FAULT_FLAG_WRITE = 1 << 0,

View File

@@ -36,19 +36,6 @@ enum {
NR_MM_COUNTERS
};
#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
#define SPLIT_RSS_COUNTING
/* per-thread cached information, */
struct task_rss_stat {
int events; /* for synchronization threshold */
int count[NR_MM_COUNTERS];
};
#endif /* USE_SPLIT_PTE_PTLOCKS */
struct mm_rss_stat {
atomic_long_t count[NR_MM_COUNTERS];
};
struct page_frag {
struct page *page;
#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)

View File

@@ -1224,7 +1224,7 @@ typedef struct pglist_data {
/* start time in ms of current promote threshold adjustment period */
unsigned int nbp_th_start;
/*
* number of promote candidate pages at stat time of current promote
* number of promote candidate pages at start time of current promote
* threshold adjustment period
*/
unsigned long nbp_th_nr_cand;

View File

@@ -176,9 +176,6 @@ enum pageflags {
/* SLOB */
PG_slob_free = PG_private,
/* Compound pages. Stored in first tail page's flags */
PG_double_map = PG_workingset,
#ifdef CONFIG_MEMORY_FAILURE
/*
* Compound pages. Stored in first tail page's flags.
@@ -641,7 +638,7 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
* Different with flags above, this flag is used only for fsdax mode. It
* indicates that this page->mapping is now under reflink case.
*/
#define PAGE_MAPPING_DAX_COW 0x1
#define PAGE_MAPPING_DAX_SHARED ((void *)0x1)
static __always_inline bool folio_mapping_flags(struct folio *folio)
{
@@ -874,29 +871,11 @@ static inline int PageTransTail(struct page *page)
{
return PageTail(page);
}
/*
* PageDoubleMap indicates that the compound page is mapped with PTEs as well
* as PMDs.
*
* This is required for optimization of rmap operations for THP: we can postpone
* per small page mapcount accounting (and its overhead from atomic operations)
* until the first PMD split.
*
* For the page PageDoubleMap means ->_mapcount in all sub-pages is offset up
* by one. This reference will go away with last compound_mapcount.
*
* See also __split_huge_pmd_locked() and page_remove_anon_compound_rmap().
*/
PAGEFLAG(DoubleMap, double_map, PF_SECOND)
TESTSCFLAG(DoubleMap, double_map, PF_SECOND)
#else
TESTPAGEFLAG_FALSE(TransHuge, transhuge)
TESTPAGEFLAG_FALSE(TransCompound, transcompound)
TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap)
TESTPAGEFLAG_FALSE(TransTail, transtail)
PAGEFLAG_FALSE(DoubleMap, double_map)
TESTSCFLAG_FALSE(DoubleMap, double_map)
#endif
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)

View File

@@ -504,9 +504,8 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
#define FGP_NOFS 0x00000010
#define FGP_NOWAIT 0x00000020
#define FGP_FOR_MMAP 0x00000040
#define FGP_HEAD 0x00000080
#define FGP_ENTRY 0x00000100
#define FGP_STABLE 0x00000200
#define FGP_ENTRY 0x00000080
#define FGP_STABLE 0x00000100
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
int fgp_flags, gfp_t gfp);
@@ -1102,12 +1101,10 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
int filemap_add_folio(struct address_space *mapping, struct folio *folio,
pgoff_t index, gfp_t gfp);
void filemap_remove_folio(struct folio *folio);
void delete_from_page_cache(struct page *page);
void __filemap_remove_folio(struct folio *folio, void *shadow);
void replace_page_cache_page(struct page *old, struct page *new);
void replace_page_cache_folio(struct folio *old, struct folio *new);
void delete_from_page_cache_batch(struct address_space *mapping,
struct folio_batch *fbatch);
int try_to_release_page(struct page *page, gfp_t gfp);
bool filemap_release_folio(struct folio *folio, gfp_t gfp);
loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
int whence);

View File

@@ -27,6 +27,8 @@ struct mm_walk;
* "do page table walk over the current vma", returning
* a negative value means "abort current page table walk
* right now" and returning 1 means "skip the current vma"
* Note that this callback is not called when the caller
* passes in a single VMA as for walk_page_vma().
* @pre_vma: if set, called before starting walk on a non-null vma.
* @post_vma: if set, called after a walk on a non-null vma, provided
* that @pre_vma and the vma walk succeeded.
@@ -99,6 +101,9 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
pgd_t *pgd,
void *private);
int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
void *private);
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
void *private);
int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,

View File

@@ -13,7 +13,6 @@
#include <linux/threads.h>
#include <linux/percpu.h>
#include <linux/types.h>
#include <linux/gfp.h>
/* percpu_counter batch for local add or sub */
#define PERCPU_COUNTER_LOCAL_BATCH INT_MAX
@@ -46,6 +45,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
s64 percpu_counter_sum_all(struct percpu_counter *fbc);
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
void percpu_counter_sync(struct percpu_counter *fbc);
@@ -194,6 +194,11 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
return percpu_counter_read(fbc);
}
static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc)
{
return percpu_counter_read(fbc);
}
static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
{
return true;

View File

@@ -425,9 +425,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pte_t *ptep,
int full)
{
pte_t pte;
pte = ptep_get_and_clear(mm, address, ptep);
return pte;
return ptep_get_and_clear(mm, address, ptep);
}
#endif
@@ -503,30 +501,6 @@ static inline pte_t pte_sw_mkyoung(pte_t pte)
#define pte_sw_mkyoung pte_sw_mkyoung
#endif
#ifndef pte_savedwrite
#define pte_savedwrite pte_write
#endif
#ifndef pte_mk_savedwrite
#define pte_mk_savedwrite pte_mkwrite
#endif
#ifndef pte_clear_savedwrite
#define pte_clear_savedwrite pte_wrprotect
#endif
#ifndef pmd_savedwrite
#define pmd_savedwrite pmd_write
#endif
#ifndef pmd_mk_savedwrite
#define pmd_mk_savedwrite pmd_mkwrite
#endif
#ifndef pmd_clear_savedwrite
#define pmd_clear_savedwrite pmd_wrprotect
#endif
#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void pmdp_set_wrprotect(struct mm_struct *mm,

View File

@@ -870,9 +870,6 @@ struct task_struct {
struct mm_struct *mm;
struct mm_struct *active_mm;
#ifdef SPLIT_RSS_COUNTING
struct task_rss_stat rss_stat;
#endif
int exit_state;
int exit_code;
int exit_signal;

View File

@@ -2,6 +2,9 @@
#ifndef _LINUX_SHRINKER_H
#define _LINUX_SHRINKER_H
#include <linux/atomic.h>
#include <linux/types.h>
/*
* This struct is used to pass information from page reclaim to the shrinkers.
* We consolidate the values for easier extension later.

View File

@@ -55,22 +55,14 @@ static inline int current_is_kswapd(void)
* actions on faults.
*/
#define SWP_SWAPIN_ERROR_NUM 1
#define SWP_SWAPIN_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \
SWP_PTE_MARKER_NUM)
/*
* PTE markers are used to persist information onto PTEs that are mapped with
* file-backed memories. As its name "PTE" hints, it should only be applied to
* the leaves of pgtables.
* PTE markers are used to persist information onto PTEs that otherwise
* should be a none pte. As its name "PTE" hints, it should only be
* applied to the leaves of pgtables.
*/
#ifdef CONFIG_PTE_MARKER
#define SWP_PTE_MARKER_NUM 1
#define SWP_PTE_MARKER (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
#else
#define SWP_PTE_MARKER_NUM 0
#endif
/*
* Unaddressable device memory support. See include/linux/hmm.h and
@@ -125,7 +117,7 @@ static inline int current_is_kswapd(void)
#define MAX_SWAPFILES \
((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \
SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM)
SWP_PTE_MARKER_NUM)
/*
* Magic header for a swap area. The first part of the union is
@@ -384,11 +376,11 @@ extern unsigned long totalreserve_pages;
/* linux/mm/swap.c */
void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages);
void lru_note_cost_folio(struct folio *);
void lru_note_cost(struct lruvec *lruvec, bool file,
unsigned int nr_io, unsigned int nr_rotated);
void lru_note_cost_refault(struct folio *);
void folio_add_lru(struct folio *);
void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
void lru_cache_add(struct page *);
void mark_page_accessed(struct page *);
void folio_mark_accessed(struct folio *);
@@ -426,7 +418,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
unsigned int reclaim_options);
unsigned int reclaim_options,
nodemask_t *nodemask);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
@@ -470,7 +463,7 @@ static inline unsigned long total_swapcache_pages(void)
extern void free_swap_cache(struct page *page);
extern void free_page_and_swap_cache(struct page *);
extern void free_pages_and_swap_cache(struct page **, int);
extern void free_pages_and_swap_cache(struct encoded_page **, int);
/* linux/mm/swapfile.c */
extern atomic_long_t nr_swap_pages;
extern long total_swap_pages;

View File

@@ -164,16 +164,6 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
return xa_mk_value(entry.val);
}
static inline swp_entry_t make_swapin_error_entry(struct page *page)
{
return swp_entry(SWP_SWAPIN_ERROR, page_to_pfn(page));
}
static inline int is_swapin_error_entry(swp_entry_t entry)
{
return swp_type(entry) == SWP_SWAPIN_ERROR;
}
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
{
@@ -411,10 +401,9 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
typedef unsigned long pte_marker;
#define PTE_MARKER_UFFD_WP BIT(0)
#define PTE_MARKER_MASK (PTE_MARKER_UFFD_WP)
#ifdef CONFIG_PTE_MARKER
#define PTE_MARKER_UFFD_WP BIT(0)
#define PTE_MARKER_SWAPIN_ERROR BIT(1)
#define PTE_MARKER_MASK (BIT(2) - 1)
static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
{
@@ -436,37 +425,22 @@ static inline bool is_pte_marker(pte_t pte)
return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
}
#else /* CONFIG_PTE_MARKER */
static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
{
/* This should never be called if !CONFIG_PTE_MARKER */
WARN_ON_ONCE(1);
return swp_entry(0, 0);
}
static inline bool is_pte_marker_entry(swp_entry_t entry)
{
return false;
}
static inline pte_marker pte_marker_get(swp_entry_t entry)
{
return 0;
}
static inline bool is_pte_marker(pte_t pte)
{
return false;
}
#endif /* CONFIG_PTE_MARKER */
static inline pte_t make_pte_marker(pte_marker marker)
{
return swp_entry_to_pte(make_pte_marker_entry(marker));
}
static inline swp_entry_t make_swapin_error_entry(void)
{
return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR);
}
static inline int is_swapin_error_entry(swp_entry_t entry)
{
return is_pte_marker_entry(entry) &&
(pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR);
}
/*
* This is a special version to check pte_none() just to cover the case when
* the pte is a pte marker. It existed because in many cases the pte marker
@@ -479,9 +453,6 @@ static inline pte_t make_pte_marker(pte_marker marker)
* memory, kernel-only memory (including when the system is during-boot),
* non-ram based generic file-system. It's fine to be used even there, but the
* extra pte marker check will be pure overhead.
*
* For systems configured with !CONFIG_PTE_MARKER this will be automatically
* optimized to pte_none().
*/
static inline int pte_none_mostly(pte_t pte)
{
@@ -583,8 +554,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
#ifdef CONFIG_MEMORY_FAILURE
extern atomic_long_t num_poisoned_pages __read_mostly;
/*
* Support for hardware poisoned pages
*/
@@ -599,17 +568,7 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
return swp_type(entry) == SWP_HWPOISON;
}
static inline void num_poisoned_pages_inc(void)
{
atomic_long_inc(&num_poisoned_pages);
}
static inline void num_poisoned_pages_sub(long i)
{
atomic_long_sub(i, &num_poisoned_pages);
}
#else /* CONFIG_MEMORY_FAILURE */
#else
static inline swp_entry_t make_hwpoison_entry(struct page *page)
{
@@ -620,15 +579,7 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
{
return 0;
}
static inline void num_poisoned_pages_inc(void)
{
}
static inline void num_poisoned_pages_sub(long i)
{
}
#endif /* CONFIG_MEMORY_FAILURE */
#endif
static inline int non_swap_entry(swp_entry_t entry)
{

View File

@@ -40,10 +40,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGREUSE,
PGSTEAL_KSWAPD,
PGSTEAL_DIRECT,
PGSTEAL_KHUGEPAGED,
PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
PGSCAN_KSWAPD,
PGSCAN_DIRECT,
PGSCAN_KHUGEPAGED,
PGSCAN_DIRECT_THROTTLE,
PGSCAN_ANON,
PGSCAN_FILE,

View File

@@ -55,5 +55,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
unsigned long zs_get_total_pages(struct zs_pool *pool);
unsigned long zs_compact(struct zs_pool *pool);
unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size);
void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
#endif