Merge tag 'slab-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:

 - A new layer for caching objects for allocation and free via percpu
   arrays called sheaves.

   The aim is to combine the good parts of SLAB (lower-overhead and
   simpler percpu caching, compared to SLUB) without the past issues
   with arrays for freeing remote NUMA node objects and their flushing.

   It also allows more efficient kfree_rcu(), and cheaper object
   preallocations for cases where the exact number of objects is
   unknown, but an upper bound is.

   Currently VMAs and maple nodes are using this new caching, with a
   plan to enable it for all caches and remove the complex SLUB fastpath
   based on cpu (partial) slabs and this_cpu_cmpxchg_double().
   (Vlastimil Babka, with Liam Howlett and Pedro Falcato for the maple
   tree changes)

 - Re-entrant kmalloc_nolock(), which allows opportunistic allocations
   from NMI and tracing/kprobe contexts.

   Building on prior page allocator and memcg changes, it will result in
   removing BPF-specific caches on top of slab (Alexei Starovoitov)

 - Various fixes and cleanups. (Kuan-Wei Chiu, Matthew Wilcox, Suren
   Baghdasaryan, Ye Liu)

* tag 'slab-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (40 commits)
  slab: Introduce kmalloc_nolock() and kfree_nolock().
  slab: Reuse first bit for OBJEXTS_ALLOC_FAIL
  slab: Make slub local_(try)lock more precise for LOCKDEP
  mm: Introduce alloc_frozen_pages_nolock()
  mm: Allow GFP_ACCOUNT to be used in alloc_pages_nolock().
  locking/local_lock: Introduce local_lock_is_locked().
  maple_tree: Convert forking to use the sheaf interface
  maple_tree: Add single node allocation support to maple state
  maple_tree: Prefilled sheaf conversion and testing
  tools/testing: Add support for prefilled slab sheafs
  maple_tree: Replace mt_free_one() with kfree()
  maple_tree: Use kfree_rcu in ma_free_rcu
  testing/radix-tree/maple: Hack around kfree_rcu not existing
  tools/testing: include maple-shim.c in maple.c
  maple_tree: use percpu sheaves for maple_node_cache
  mm, vma: use percpu sheaves for vm_area_struct cache
  tools/testing: Add support for changes to slab for sheaves
  slab: allow NUMA restricted allocations to use percpu sheaves
  tools/testing/vma: Implement vm_refcnt reset
  slab: skip percpu sheaves for remote object freeing
  ...
This commit is contained in:
Linus Torvalds
2025-10-02 15:58:05 -07:00
27 changed files with 2909 additions and 1592 deletions

View File

@@ -354,7 +354,7 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
}
#define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
struct page *alloc_pages_nolock_noprof(int nid, unsigned int order);
struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order);
#define alloc_pages_nolock(...) alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__))
extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order);

View File

@@ -200,7 +200,7 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
}
bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
bool still_accessible);
bool still_accessible, bool no_quarantine);
/**
* kasan_slab_free - Poison, initialize, and quarantine a slab object.
* @object: Object to be freed.
@@ -226,11 +226,13 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
* @Return true if KASAN took ownership of the object; false otherwise.
*/
static __always_inline bool kasan_slab_free(struct kmem_cache *s,
void *object, bool init,
bool still_accessible)
void *object, bool init,
bool still_accessible,
bool no_quarantine)
{
if (kasan_enabled())
return __kasan_slab_free(s, object, init, still_accessible);
return __kasan_slab_free(s, object, init, still_accessible,
no_quarantine);
return false;
}
@@ -427,7 +429,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
}
static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
bool init, bool still_accessible)
bool init, bool still_accessible,
bool no_quarantine)
{
return false;
}

View File

@@ -66,6 +66,8 @@
*/
#define local_trylock(lock) __local_trylock(this_cpu_ptr(lock))
#define local_lock_is_locked(lock) __local_lock_is_locked(lock)
/**
* local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
* interrupts if acquired

View File

@@ -17,7 +17,10 @@ typedef struct {
/* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */
typedef struct {
local_lock_t llock;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
struct task_struct *owner;
#endif
u8 acquired;
} local_trylock_t;
@@ -31,7 +34,7 @@ typedef struct {
.owner = NULL,
# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \
.llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) },
LOCAL_LOCK_DEBUG_INIT(lockname)
static inline void local_lock_acquire(local_lock_t *l)
{
@@ -81,7 +84,7 @@ do { \
local_lock_debug_init(lock); \
} while (0)
#define __local_trylock_init(lock) __local_lock_init(lock.llock)
#define __local_trylock_init(lock) __local_lock_init((local_lock_t *)lock)
#define __spinlock_nested_bh_init(lock) \
do { \
@@ -162,6 +165,9 @@ do { \
!!tl; \
})
/* preemption or migration must be disabled before calling __local_lock_is_locked */
#define __local_lock_is_locked(lock) READ_ONCE(this_cpu_ptr(lock)->acquired)
#define __local_lock_release(lock) \
do { \
local_trylock_t *tl; \
@@ -282,4 +288,8 @@ do { \
__local_trylock(lock); \
})
/* migration must be disabled before calling __local_lock_is_locked */
#define __local_lock_is_locked(__lock) \
(rt_mutex_owner(&this_cpu_ptr(__lock)->lock) == current)
#endif /* CONFIG_PREEMPT_RT */

View File

@@ -442,7 +442,9 @@ struct ma_state {
struct maple_enode *node; /* The node containing this entry */
unsigned long min; /* The minimum index of this node - implied pivot min */
unsigned long max; /* The maximum index of this node - implied pivot max */
struct maple_alloc *alloc; /* Allocated nodes for this operation */
struct slab_sheaf *sheaf; /* Allocated nodes for this operation */
struct maple_node *alloc; /* A single allocated node for fast path writes */
unsigned long node_request; /* The number of nodes to allocate for this operation */
enum maple_status status; /* The status of the state (active, start, none, etc) */
unsigned char depth; /* depth of tree descent during write */
unsigned char offset;
@@ -490,7 +492,9 @@ struct ma_wr_state {
.status = ma_start, \
.min = 0, \
.max = ULONG_MAX, \
.sheaf = NULL, \
.alloc = NULL, \
.node_request = 0, \
.mas_flags = 0, \
.store_type = wr_invalid, \
}

View File

@@ -341,17 +341,25 @@ enum page_memcg_data_flags {
__NR_MEMCG_DATA_FLAGS = (1UL << 2),
};
#define __OBJEXTS_ALLOC_FAIL MEMCG_DATA_OBJEXTS
#define __FIRST_OBJEXT_FLAG __NR_MEMCG_DATA_FLAGS
#else /* CONFIG_MEMCG */
#define __OBJEXTS_ALLOC_FAIL (1UL << 0)
#define __FIRST_OBJEXT_FLAG (1UL << 0)
#endif /* CONFIG_MEMCG */
enum objext_flags {
/* slabobj_ext vector failed to allocate */
OBJEXTS_ALLOC_FAIL = __FIRST_OBJEXT_FLAG,
/*
* Use bit 0 with zero other bits to signal that slabobj_ext vector
* failed to allocate. The same bit 0 with valid upper bits means
* MEMCG_DATA_OBJEXTS.
*/
OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
/* slabobj_ext vector allocated with kmalloc_nolock() */
OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
/* the next bit after the last actual flag */
__NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1),
};

View File

@@ -44,6 +44,16 @@ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock)
return READ_ONCE(lock->owner) != NULL;
}
#ifdef CONFIG_RT_MUTEXES
#define RT_MUTEX_HAS_WAITERS 1UL
static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
{
unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
}
#endif
extern void rt_mutex_base_init(struct rt_mutex_base *rtb);
/**

View File

@@ -335,6 +335,37 @@ struct kmem_cache_args {
* %NULL means no constructor.
*/
void (*ctor)(void *);
/**
* @sheaf_capacity: Enable sheaves of given capacity for the cache.
*
* With a non-zero value, allocations from the cache go through caching
* arrays called sheaves. Each cpu has a main sheaf that's always
* present, and a spare sheaf that may be not present. When both become
* empty, there's an attempt to replace an empty sheaf with a full sheaf
* from the per-node barn.
*
* When no full sheaf is available, and gfp flags allow blocking, a
* sheaf is allocated and filled from slab(s) using bulk allocation.
* Otherwise the allocation falls back to the normal operation
* allocating a single object from a slab.
*
* Analogically when freeing and both percpu sheaves are full, the barn
* may replace it with an empty sheaf, unless it's over capacity. In
* that case a sheaf is bulk freed to slab pages.
*
* The sheaves do not enforce NUMA placement of objects, so allocations
* via kmem_cache_alloc_node() with a node specified other than
* NUMA_NO_NODE will bypass them.
*
* Bulk allocation and free operations also try to use the cpu sheaves
* and barn, but fallback to using slab pages directly.
*
* When slub_debug is enabled for the cache, the sheaf_capacity argument
* is ignored.
*
* %0 means no sheaves will be created.
*/
unsigned int sheaf_capacity;
};
struct kmem_cache *__kmem_cache_create_args(const char *name,
@@ -470,6 +501,7 @@ void * __must_check krealloc_noprof(const void *objp, size_t new_size,
#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__))
void kfree(const void *objp);
void kfree_nolock(const void *objp);
void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);
@@ -798,6 +830,22 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
int node) __assume_slab_alignment __malloc;
#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
struct slab_sheaf *
kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size);
int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf **sheafp, unsigned int size);
void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf *sheaf);
void *kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *cachep, gfp_t gfp,
struct slab_sheaf *sheaf) __assume_slab_alignment __malloc;
#define kmem_cache_alloc_from_sheaf(...) \
alloc_hooks(kmem_cache_alloc_from_sheaf_noprof(__VA_ARGS__))
unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf);
/*
* These macros allow declaring a kmem_buckets * parameter alongside size, which
* can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call
@@ -910,6 +958,9 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
}
#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node);
#define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__))
#define kmem_buckets_alloc(_b, _size, _flags) \
alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))

View File

@@ -83,7 +83,7 @@ static struct bpf_stream_page *bpf_stream_page_replace(void)
struct bpf_stream_page *stream_page, *old_stream_page;
struct page *page;
page = alloc_pages_nolock(NUMA_NO_NODE, 0);
page = alloc_pages_nolock(/* Don't account */ 0, NUMA_NO_NODE, 0);
if (!page)
return NULL;
stream_page = page_address(page);

View File

@@ -583,7 +583,7 @@ static bool can_alloc_pages(void)
static struct page *__bpf_alloc_page(int nid)
{
if (!can_alloc_pages())
return alloc_pages_nolock(nid, 0);
return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0);
return alloc_pages_node(nid,
GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT

View File

@@ -153,15 +153,6 @@ static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
pi_tree.entry);
}
#define RT_MUTEX_HAS_WAITERS 1UL
static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
{
unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
}
/*
* Constants for rt mutex functions which have a selectable deadlock
* detection.

View File

@@ -83,13 +83,9 @@
/*
* Maple state flags
* * MA_STATE_BULK - Bulk insert mode
* * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert
* * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation
*/
#define MA_STATE_BULK 1
#define MA_STATE_REBALANCE 2
#define MA_STATE_PREALLOC 4
#define MA_STATE_PREALLOC 1
#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
#define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT)
@@ -176,26 +172,25 @@ static inline struct maple_node *mt_alloc_one(gfp_t gfp)
return kmem_cache_alloc(maple_node_cache, gfp);
}
static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
{
return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
}
static inline void mt_free_one(struct maple_node *node)
{
kmem_cache_free(maple_node_cache, node);
}
static inline void mt_free_bulk(size_t size, void __rcu **nodes)
{
kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
}
static void mt_free_rcu(struct rcu_head *head)
static void mt_return_sheaf(struct slab_sheaf *sheaf)
{
struct maple_node *node = container_of(head, struct maple_node, rcu);
kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf);
}
kmem_cache_free(maple_node_cache, node);
static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count)
{
return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count);
}
static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf,
unsigned int size)
{
return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size);
}
/*
@@ -208,7 +203,7 @@ static void mt_free_rcu(struct rcu_head *head)
static void ma_free_rcu(struct maple_node *node)
{
WARN_ON(node->parent != ma_parent_ptr(node));
call_rcu(&node->rcu, mt_free_rcu);
kfree_rcu(node, rcu);
}
static void mt_set_height(struct maple_tree *mt, unsigned char height)
@@ -590,67 +585,6 @@ static __always_inline bool mte_dead_node(const struct maple_enode *enode)
return ma_dead_node(node);
}
/*
* mas_allocated() - Get the number of nodes allocated in a maple state.
* @mas: The maple state
*
* The ma_state alloc member is overloaded to hold a pointer to the first
* allocated node or to the number of requested nodes to allocate. If bit 0 is
* set, then the alloc contains the number of requested nodes. If there is an
* allocated node, then the total allocated nodes is in that node.
*
* Return: The total number of nodes allocated
*/
static inline unsigned long mas_allocated(const struct ma_state *mas)
{
if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
return 0;
return mas->alloc->total;
}
/*
* mas_set_alloc_req() - Set the requested number of allocations.
* @mas: the maple state
* @count: the number of allocations.
*
* The requested number of allocations is either in the first allocated node,
* located in @mas->alloc->request_count, or directly in @mas->alloc if there is
* no allocated node. Set the request either in the node or do the necessary
* encoding to store in @mas->alloc directly.
*/
static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
{
if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
if (!count)
mas->alloc = NULL;
else
mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
return;
}
mas->alloc->request_count = count;
}
/*
* mas_alloc_req() - get the requested number of allocations.
* @mas: The maple state
*
* The alloc count is either stored directly in @mas, or in
* @mas->alloc->request_count if there is at least one node allocated. Decode
* the request count if it's stored directly in @mas->alloc.
*
* Return: The allocation request count.
*/
static inline unsigned int mas_alloc_req(const struct ma_state *mas)
{
if ((unsigned long)mas->alloc & 0x1)
return (unsigned long)(mas->alloc) >> 1;
else if (mas->alloc)
return mas->alloc->request_count;
return 0;
}
/*
* ma_pivots() - Get a pointer to the maple node pivots.
* @node: the maple node
@@ -1031,24 +965,6 @@ static inline void mas_descend(struct ma_state *mas)
mas->node = mas_slot(mas, slots, mas->offset);
}
/*
* mte_set_gap() - Set a maple node gap.
* @mn: The encoded maple node
* @gap: The offset of the gap to set
* @val: The gap value
*/
static inline void mte_set_gap(const struct maple_enode *mn,
unsigned char gap, unsigned long val)
{
switch (mte_node_type(mn)) {
default:
break;
case maple_arange_64:
mte_to_node(mn)->ma64.gap[gap] = val;
break;
}
}
/*
* mas_ascend() - Walk up a level of the tree.
* @mas: The maple state
@@ -1152,79 +1068,24 @@ static int mas_ascend(struct ma_state *mas)
*
* Return: A pointer to a maple node.
*/
static inline struct maple_node *mas_pop_node(struct ma_state *mas)
static __always_inline struct maple_node *mas_pop_node(struct ma_state *mas)
{
struct maple_alloc *ret, *node = mas->alloc;
unsigned long total = mas_allocated(mas);
unsigned int req = mas_alloc_req(mas);
struct maple_node *ret;
/* nothing or a request pending. */
if (WARN_ON(!total))
if (mas->alloc) {
ret = mas->alloc;
mas->alloc = NULL;
goto out;
}
if (WARN_ON_ONCE(!mas->sheaf))
return NULL;
if (total == 1) {
/* single allocation in this ma_state */
mas->alloc = NULL;
ret = node;
goto single_node;
}
if (node->node_count == 1) {
/* Single allocation in this node. */
mas->alloc = node->slot[0];
mas->alloc->total = node->total - 1;
ret = node;
goto new_head;
}
node->total--;
ret = node->slot[--node->node_count];
node->slot[node->node_count] = NULL;
single_node:
new_head:
if (req) {
req++;
mas_set_alloc_req(mas, req);
}
ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf);
out:
memset(ret, 0, sizeof(*ret));
return (struct maple_node *)ret;
}
/*
* mas_push_node() - Push a node back on the maple state allocation.
* @mas: The maple state
* @used: The used maple node
*
* Stores the maple node back into @mas->alloc for reuse. Updates allocated and
* requested node count as necessary.
*/
static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
{
struct maple_alloc *reuse = (struct maple_alloc *)used;
struct maple_alloc *head = mas->alloc;
unsigned long count;
unsigned int requested = mas_alloc_req(mas);
count = mas_allocated(mas);
reuse->request_count = 0;
reuse->node_count = 0;
if (count) {
if (head->node_count < MAPLE_ALLOC_SLOTS) {
head->slot[head->node_count++] = reuse;
head->total++;
goto done;
}
reuse->slot[0] = head;
reuse->node_count = 1;
}
reuse->total = count + 1;
mas->alloc = reuse;
done:
if (requested > 1)
mas_set_alloc_req(mas, requested - 1);
return ret;
}
/*
@@ -1234,75 +1095,70 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
*/
static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
{
struct maple_alloc *node;
unsigned long allocated = mas_allocated(mas);
unsigned int requested = mas_alloc_req(mas);
unsigned int count;
void **slots = NULL;
unsigned int max_req = 0;
if (!requested)
if (!mas->node_request)
return;
mas_set_alloc_req(mas, 0);
if (mas->mas_flags & MA_STATE_PREALLOC) {
if (allocated)
if (mas->node_request == 1) {
if (mas->sheaf)
goto use_sheaf;
if (mas->alloc)
return;
WARN_ON(!allocated);
mas->alloc = mt_alloc_one(gfp);
if (!mas->alloc)
goto error;
mas->node_request = 0;
return;
}
if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
node = (struct maple_alloc *)mt_alloc_one(gfp);
if (!node)
goto nomem_one;
use_sheaf:
if (unlikely(mas->alloc)) {
kfree(mas->alloc);
mas->alloc = NULL;
}
if (allocated) {
node->slot[0] = mas->alloc;
node->node_count = 1;
} else {
node->node_count = 0;
if (mas->sheaf) {
unsigned long refill;
refill = mas->node_request;
if (kmem_cache_sheaf_size(mas->sheaf) >= refill) {
mas->node_request = 0;
return;
}
mas->alloc = node;
node->total = ++allocated;
node->request_count = 0;
requested--;
if (mt_refill_sheaf(gfp, &mas->sheaf, refill))
goto error;
mas->node_request = 0;
return;
}
node = mas->alloc;
while (requested) {
max_req = MAPLE_ALLOC_SLOTS - node->node_count;
slots = (void **)&node->slot[node->node_count];
max_req = min(requested, max_req);
count = mt_alloc_bulk(gfp, max_req, slots);
if (!count)
goto nomem_bulk;
if (node->node_count == 0) {
node->slot[0]->node_count = 0;
node->slot[0]->request_count = 0;
}
node->node_count += count;
allocated += count;
/* find a non-full node*/
do {
node = node->slot[0];
} while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS));
requested -= count;
mas->sheaf = mt_get_sheaf(gfp, mas->node_request);
if (likely(mas->sheaf)) {
mas->node_request = 0;
return;
}
mas->alloc->total = allocated;
return;
nomem_bulk:
/* Clean up potential freed allocations on bulk failure */
memset(slots, 0, max_req * sizeof(unsigned long));
mas->alloc->total = allocated;
nomem_one:
mas_set_alloc_req(mas, requested);
error:
mas_set_err(mas, -ENOMEM);
}
static inline void mas_empty_nodes(struct ma_state *mas)
{
mas->node_request = 0;
if (mas->sheaf) {
mt_return_sheaf(mas->sheaf);
mas->sheaf = NULL;
}
if (mas->alloc) {
kfree(mas->alloc);
mas->alloc = NULL;
}
}
/*
* mas_free() - Free an encoded maple node
* @mas: The maple state
@@ -1313,42 +1169,7 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
*/
static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
{
struct maple_node *tmp = mte_to_node(used);
if (mt_in_rcu(mas->tree))
ma_free_rcu(tmp);
else
mas_push_node(mas, tmp);
}
/*
* mas_node_count_gfp() - Check if enough nodes are allocated and request more
* if there is not enough nodes.
* @mas: The maple state
* @count: The number of nodes needed
* @gfp: the gfp flags
*/
static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
{
unsigned long allocated = mas_allocated(mas);
if (allocated < count) {
mas_set_alloc_req(mas, count - allocated);
mas_alloc_nodes(mas, gfp);
}
}
/*
* mas_node_count() - Check if enough nodes are allocated and request more if
* there is not enough nodes.
* @mas: The maple state
* @count: The number of nodes needed
*
* Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
*/
static void mas_node_count(struct ma_state *mas, int count)
{
return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
ma_free_rcu(mte_to_node(used));
}
/*
@@ -1878,21 +1699,7 @@ static inline int mab_calc_split(struct ma_state *mas,
* end on a NULL entry, with the exception of the left-most leaf. The
* limitation means that the split of a node must be checked for this condition
* and be able to put more data in one direction or the other.
*/
if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
*mid_split = 0;
split = b_end - mt_min_slots[bn->type];
if (!ma_is_leaf(bn->type))
return split;
mas->mas_flags |= MA_STATE_REBALANCE;
if (!bn->slot[split])
split--;
return split;
}
/*
*
* Although extremely rare, it is possible to enter what is known as the 3-way
* split scenario. The 3-way split comes about by means of a store of a range
* that overwrites the end and beginning of two full nodes. The result is a set
@@ -2039,27 +1846,6 @@ static inline void mab_mas_cp(struct maple_big_node *b_node,
}
}
/*
* mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
* @mas: The maple state
* @end: The maple node end
* @mt: The maple node type
*/
static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
enum maple_type mt)
{
if (!(mas->mas_flags & MA_STATE_BULK))
return;
if (mte_is_root(mas->node))
return;
if (end > mt_min_slots[mt]) {
mas->mas_flags &= ~MA_STATE_REBALANCE;
return;
}
}
/*
* mas_store_b_node() - Store an @entry into the b_node while also copying the
* data from a maple encoded node.
@@ -2109,9 +1895,6 @@ static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas,
/* Handle new range ending before old range ends */
piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
if (piv > mas->last) {
if (piv == ULONG_MAX)
mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);
if (offset_end != slot)
wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
offset_end);
@@ -2523,10 +2306,7 @@ static inline void mas_topiary_node(struct ma_state *mas,
enode = tmp_mas->node;
tmp = mte_to_node(enode);
mte_set_node_dead(enode);
if (in_rcu)
ma_free_rcu(tmp);
else
mas_push_node(mas, tmp);
ma_free_rcu(tmp);
}
/*
@@ -3011,126 +2791,6 @@ static inline void mas_rebalance(struct ma_state *mas,
return mas_spanning_rebalance(mas, &mast, empty_count);
}
/*
* mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
* state.
* @mas: The maple state
* @end: The end of the left-most node.
*
* During a mass-insert event (such as forking), it may be necessary to
* rebalance the left-most node when it is not sufficient.
*/
static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
{
enum maple_type mt = mte_node_type(mas->node);
struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
struct maple_enode *eparent, *old_eparent;
unsigned char offset, tmp, split = mt_slots[mt] / 2;
void __rcu **l_slots, **slots;
unsigned long *l_pivs, *pivs, gap;
bool in_rcu = mt_in_rcu(mas->tree);
unsigned char new_height = mas_mt_height(mas);
MA_STATE(l_mas, mas->tree, mas->index, mas->last);
l_mas = *mas;
mas_prev_sibling(&l_mas);
/* set up node. */
if (in_rcu) {
newnode = mas_pop_node(mas);
} else {
newnode = &reuse;
}
node = mas_mn(mas);
newnode->parent = node->parent;
slots = ma_slots(newnode, mt);
pivs = ma_pivots(newnode, mt);
left = mas_mn(&l_mas);
l_slots = ma_slots(left, mt);
l_pivs = ma_pivots(left, mt);
if (!l_slots[split])
split++;
tmp = mas_data_end(&l_mas) - split;
memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
pivs[tmp] = l_mas.max;
memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);
l_mas.max = l_pivs[split];
mas->min = l_mas.max + 1;
old_eparent = mt_mk_node(mte_parent(l_mas.node),
mas_parent_type(&l_mas, l_mas.node));
tmp += end;
if (!in_rcu) {
unsigned char max_p = mt_pivots[mt];
unsigned char max_s = mt_slots[mt];
if (tmp < max_p)
memset(pivs + tmp, 0,
sizeof(unsigned long) * (max_p - tmp));
if (tmp < mt_slots[mt])
memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));
memcpy(node, newnode, sizeof(struct maple_node));
ma_set_meta(node, mt, 0, tmp - 1);
mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node),
l_pivs[split]);
/* Remove data from l_pivs. */
tmp = split + 1;
memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
ma_set_meta(left, mt, 0, split);
eparent = old_eparent;
goto done;
}
/* RCU requires replacing both l_mas, mas, and parent. */
mas->node = mt_mk_node(newnode, mt);
ma_set_meta(newnode, mt, 0, tmp);
new_left = mas_pop_node(mas);
new_left->parent = left->parent;
mt = mte_node_type(l_mas.node);
slots = ma_slots(new_left, mt);
pivs = ma_pivots(new_left, mt);
memcpy(slots, l_slots, sizeof(void *) * split);
memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
ma_set_meta(new_left, mt, 0, split);
l_mas.node = mt_mk_node(new_left, mt);
/* replace parent. */
offset = mte_parent_slot(mas->node);
mt = mas_parent_type(&l_mas, l_mas.node);
parent = mas_pop_node(mas);
slots = ma_slots(parent, mt);
pivs = ma_pivots(parent, mt);
memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node));
rcu_assign_pointer(slots[offset], mas->node);
rcu_assign_pointer(slots[offset - 1], l_mas.node);
pivs[offset - 1] = l_mas.max;
eparent = mt_mk_node(parent, mt);
done:
gap = mas_leaf_max_gap(mas);
mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
gap = mas_leaf_max_gap(&l_mas);
mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
mas_ascend(mas);
if (in_rcu) {
mas_replace_node(mas, old_eparent, new_height);
mas_adopt_children(mas, mas->node);
}
mas_update_gap(mas);
}
/*
* mas_split_final_node() - Split the final node in a subtree operation.
* @mast: the maple subtree state
@@ -3837,8 +3497,6 @@ static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
if (mas->last == wr_mas->end_piv)
offset_end++; /* don't copy this offset */
else if (unlikely(wr_mas->r_max == ULONG_MAX))
mas_bulk_rebalance(mas, mas->end, wr_mas->type);
/* set up node. */
if (in_rcu) {
@@ -4174,7 +3832,7 @@ static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas)
*
* Return: Number of nodes required for preallocation.
*/
static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
static inline void mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
{
struct ma_state *mas = wr_mas->mas;
unsigned char height = mas_mt_height(mas);
@@ -4220,7 +3878,7 @@ static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
WARN_ON_ONCE(1);
}
return ret;
mas->node_request = ret;
}
/*
@@ -4255,7 +3913,7 @@ static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
new_end = mas_wr_new_end(wr_mas);
/* Potential spanning rebalance collapsing a node */
if (new_end < mt_min_slots[wr_mas->type]) {
if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK))
if (!mte_is_root(mas->node))
return wr_rebalance;
return wr_node_store;
}
@@ -4281,15 +3939,15 @@ static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
*/
static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry)
{
int request;
struct ma_state *mas = wr_mas->mas;
mas_wr_prealloc_setup(wr_mas);
wr_mas->mas->store_type = mas_wr_store_type(wr_mas);
request = mas_prealloc_calc(wr_mas, entry);
if (!request)
mas->store_type = mas_wr_store_type(wr_mas);
mas_prealloc_calc(wr_mas, entry);
if (!mas->node_request)
return;
mas_node_count(wr_mas->mas, request);
mas_alloc_nodes(mas, GFP_NOWAIT);
}
/**
@@ -5281,7 +4939,7 @@ static void mt_free_walk(struct rcu_head *head)
mt_free_bulk(node->slot_len, slots);
free_leaf:
mt_free_rcu(&node->rcu);
kfree(node);
}
static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
@@ -5365,7 +5023,7 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
free_leaf:
if (free)
mt_free_rcu(&node->rcu);
kfree(node);
else
mt_clear_meta(mt, node, node->type);
}
@@ -5402,7 +5060,6 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
*/
void *mas_store(struct ma_state *mas, void *entry)
{
int request;
MA_WR_STATE(wr_mas, mas, entry);
trace_ma_write(__func__, mas, 0, entry);
@@ -5432,11 +5089,11 @@ void *mas_store(struct ma_state *mas, void *entry)
return wr_mas.content;
}
request = mas_prealloc_calc(&wr_mas, entry);
if (!request)
mas_prealloc_calc(&wr_mas, entry);
if (!mas->node_request)
goto store;
mas_node_count(mas, request);
mas_alloc_nodes(mas, GFP_NOWAIT);
if (mas_is_err(mas))
return NULL;
@@ -5524,20 +5181,19 @@ EXPORT_SYMBOL_GPL(mas_store_prealloc);
int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
{
MA_WR_STATE(wr_mas, mas, entry);
int ret = 0;
int request;
mas_wr_prealloc_setup(&wr_mas);
mas->store_type = mas_wr_store_type(&wr_mas);
request = mas_prealloc_calc(&wr_mas, entry);
if (!request)
mas_prealloc_calc(&wr_mas, entry);
if (!mas->node_request)
goto set_flag;
mas->mas_flags &= ~MA_STATE_PREALLOC;
mas_node_count_gfp(mas, request, gfp);
mas_alloc_nodes(mas, gfp);
if (mas_is_err(mas)) {
mas_set_alloc_req(mas, 0);
ret = xa_err(mas->node);
int ret = xa_err(mas->node);
mas->node_request = 0;
mas_destroy(mas);
mas_reset(mas);
return ret;
@@ -5545,7 +5201,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
set_flag:
mas->mas_flags |= MA_STATE_PREALLOC;
return ret;
return 0;
}
EXPORT_SYMBOL_GPL(mas_preallocate);
@@ -5559,109 +5215,11 @@ EXPORT_SYMBOL_GPL(mas_preallocate);
*/
void mas_destroy(struct ma_state *mas)
{
struct maple_alloc *node;
unsigned long total;
/*
* When using mas_for_each() to insert an expected number of elements,
* it is possible that the number inserted is less than the expected
* number. To fix an invalid final node, a check is performed here to
* rebalance the previous node with the final node.
*/
if (mas->mas_flags & MA_STATE_REBALANCE) {
unsigned char end;
if (mas_is_err(mas))
mas_reset(mas);
mas_start(mas);
mtree_range_walk(mas);
end = mas->end + 1;
if (end < mt_min_slot_count(mas->node) - 1)
mas_destroy_rebalance(mas, end);
mas->mas_flags &= ~MA_STATE_REBALANCE;
}
mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
total = mas_allocated(mas);
while (total) {
node = mas->alloc;
mas->alloc = node->slot[0];
if (node->node_count > 1) {
size_t count = node->node_count - 1;
mt_free_bulk(count, (void __rcu **)&node->slot[1]);
total -= count;
}
mt_free_one(ma_mnode_ptr(node));
total--;
}
mas->alloc = NULL;
mas->mas_flags &= ~MA_STATE_PREALLOC;
mas_empty_nodes(mas);
}
EXPORT_SYMBOL_GPL(mas_destroy);
/*
* mas_expected_entries() - Set the expected number of entries that will be inserted.
* @mas: The maple state
* @nr_entries: The number of expected entries.
*
* This will attempt to pre-allocate enough nodes to store the expected number
* of entries. The allocations will occur using the bulk allocator interface
* for speed. Please call mas_destroy() on the @mas after inserting the entries
* to ensure any unused nodes are freed.
*
* Return: 0 on success, -ENOMEM if memory could not be allocated.
*/
int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
{
int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
struct maple_enode *enode = mas->node;
int nr_nodes;
int ret;
/*
* Sometimes it is necessary to duplicate a tree to a new tree, such as
* forking a process and duplicating the VMAs from one tree to a new
* tree. When such a situation arises, it is known that the new tree is
* not going to be used until the entire tree is populated. For
* performance reasons, it is best to use a bulk load with RCU disabled.
* This allows for optimistic splitting that favours the left and reuse
* of nodes during the operation.
*/
/* Optimize splitting for bulk insert in-order */
mas->mas_flags |= MA_STATE_BULK;
/*
* Avoid overflow, assume a gap between each entry and a trailing null.
* If this is wrong, it just means allocation can happen during
* insertion of entries.
*/
nr_nodes = max(nr_entries, nr_entries * 2 + 1);
if (!mt_is_alloc(mas->tree))
nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;
/* Leaves; reduce slots to keep space for expansion */
nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
/* Internal nodes */
nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
/* Add working room for split (2 nodes) + new parents */
mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL);
/* Detect if allocations run out */
mas->mas_flags |= MA_STATE_PREALLOC;
if (!mas_is_err(mas))
return 0;
ret = xa_err(mas->node);
mas->node = enode;
mas_destroy(mas);
return ret;
}
EXPORT_SYMBOL_GPL(mas_expected_entries);
static void mas_may_activate(struct ma_state *mas)
{
if (!mas->node) {
@@ -6293,7 +5851,7 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp)
mas_alloc_nodes(mas, gfp);
}
if (!mas_allocated(mas))
if (!mas->sheaf && !mas->alloc)
return false;
mas->status = ma_start;
@@ -6302,9 +5860,14 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp)
void __init maple_tree_init(void)
{
struct kmem_cache_args args = {
.align = sizeof(struct maple_node),
.sheaf_capacity = 32,
};
maple_node_cache = kmem_cache_create("maple_node",
sizeof(struct maple_node), sizeof(struct maple_node),
SLAB_PANIC, NULL);
sizeof(struct maple_node), &args,
SLAB_PANIC);
}
/**
@@ -6637,7 +6200,7 @@ static void mas_dup_free(struct ma_state *mas)
}
node = mte_to_node(mas->node);
mt_free_one(node);
kfree(node);
}
/*
@@ -6678,7 +6241,7 @@ static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
struct maple_node *node = mte_to_node(mas->node);
struct maple_node *new_node = mte_to_node(new_mas->node);
enum maple_type type;
unsigned char request, count, i;
unsigned char count, i;
void __rcu **slots;
void __rcu **new_slots;
unsigned long val;
@@ -6686,20 +6249,17 @@ static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
/* Allocate memory for child nodes. */
type = mte_node_type(mas->node);
new_slots = ma_slots(new_node, type);
request = mas_data_end(mas) + 1;
count = mt_alloc_bulk(gfp, request, (void **)new_slots);
if (unlikely(count < request)) {
memset(new_slots, 0, request * sizeof(void *));
mas_set_err(mas, -ENOMEM);
count = mas->node_request = mas_data_end(mas) + 1;
mas_alloc_nodes(mas, gfp);
if (unlikely(mas_is_err(mas)))
return;
}
/* Restore node type information in slots. */
slots = ma_slots(node, type);
for (i = 0; i < count; i++) {
val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
val &= MAPLE_NODE_MASK;
((unsigned long *)new_slots)[i] |= val;
new_slots[i] = ma_mnode_ptr((unsigned long)mas_pop_node(mas) |
val);
}
}
@@ -6753,7 +6313,7 @@ static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
/* Only allocate child nodes for non-leaf nodes. */
mas_dup_alloc(mas, new_mas, gfp);
if (unlikely(mas_is_err(mas)))
return;
goto empty_mas;
} else {
/*
* This is the last leaf node and duplication is
@@ -6786,6 +6346,8 @@ static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
/* Make them the same height */
new_mas->tree->ma_flags = mas->tree->ma_flags;
rcu_assign_pointer(new_mas->tree->ma_root, root);
empty_mas:
mas_empty_nodes(mas);
}
/**
@@ -7683,8 +7245,9 @@ void mas_dump(const struct ma_state *mas)
pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end,
mas->index, mas->last);
pr_err(" min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n",
mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags);
pr_err(" min=%lx max=%lx sheaf=" PTR_FMT ", request %lu depth=%u, flags=%x\n",
mas->min, mas->max, mas->sheaf, mas->node_request, mas->depth,
mas->mas_flags);
if (mas->index > mas->last)
pr_err("Check index & last\n");
}

View File

@@ -2746,139 +2746,6 @@ static noinline void __init check_fuzzer(struct maple_tree *mt)
mtree_test_erase(mt, ULONG_MAX - 10);
}
/* duplicate the tree with a specific gap */
static noinline void __init check_dup_gaps(struct maple_tree *mt,
unsigned long nr_entries, bool zero_start,
unsigned long gap)
{
unsigned long i = 0;
struct maple_tree newmt;
int ret;
void *tmp;
MA_STATE(mas, mt, 0, 0);
MA_STATE(newmas, &newmt, 0, 0);
struct rw_semaphore newmt_lock;
init_rwsem(&newmt_lock);
mt_set_external_lock(&newmt, &newmt_lock);
if (!zero_start)
i = 1;
mt_zero_nr_tallocated();
for (; i <= nr_entries; i++)
mtree_store_range(mt, i*10, (i+1)*10 - gap,
xa_mk_value(i), GFP_KERNEL);
mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN);
mt_set_non_kernel(99999);
down_write(&newmt_lock);
ret = mas_expected_entries(&newmas, nr_entries);
mt_set_non_kernel(0);
MT_BUG_ON(mt, ret != 0);
rcu_read_lock();
mas_for_each(&mas, tmp, ULONG_MAX) {
newmas.index = mas.index;
newmas.last = mas.last;
mas_store(&newmas, tmp);
}
rcu_read_unlock();
mas_destroy(&newmas);
__mt_destroy(&newmt);
up_write(&newmt_lock);
}
/* Duplicate many sizes of trees. Mainly to test expected entry values */
static noinline void __init check_dup(struct maple_tree *mt)
{
int i;
int big_start = 100010;
/* Check with a value at zero */
for (i = 10; i < 1000; i++) {
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
check_dup_gaps(mt, i, true, 5);
mtree_destroy(mt);
rcu_barrier();
}
cond_resched();
mt_cache_shrink();
/* Check with a value at zero, no gap */
for (i = 1000; i < 2000; i++) {
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
check_dup_gaps(mt, i, true, 0);
mtree_destroy(mt);
rcu_barrier();
}
cond_resched();
mt_cache_shrink();
/* Check with a value at zero and unreasonably large */
for (i = big_start; i < big_start + 10; i++) {
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
check_dup_gaps(mt, i, true, 5);
mtree_destroy(mt);
rcu_barrier();
}
cond_resched();
mt_cache_shrink();
/* Small to medium size not starting at zero*/
for (i = 200; i < 1000; i++) {
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
check_dup_gaps(mt, i, false, 5);
mtree_destroy(mt);
rcu_barrier();
}
cond_resched();
mt_cache_shrink();
/* Unreasonably large not starting at zero*/
for (i = big_start; i < big_start + 10; i++) {
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
check_dup_gaps(mt, i, false, 5);
mtree_destroy(mt);
rcu_barrier();
cond_resched();
mt_cache_shrink();
}
/* Check non-allocation tree not starting at zero */
for (i = 1500; i < 3000; i++) {
mt_init_flags(mt, 0);
check_dup_gaps(mt, i, false, 5);
mtree_destroy(mt);
rcu_barrier();
cond_resched();
if (i % 2 == 0)
mt_cache_shrink();
}
mt_cache_shrink();
/* Check non-allocation tree starting at zero */
for (i = 200; i < 1000; i++) {
mt_init_flags(mt, 0);
check_dup_gaps(mt, i, true, 5);
mtree_destroy(mt);
rcu_barrier();
cond_resched();
}
mt_cache_shrink();
/* Unreasonably large */
for (i = big_start + 5; i < big_start + 10; i++) {
mt_init_flags(mt, 0);
check_dup_gaps(mt, i, true, 5);
mtree_destroy(mt);
rcu_barrier();
mt_cache_shrink();
cond_resched();
}
}
static noinline void __init check_bnode_min_spanning(struct maple_tree *mt)
{
int i = 50;
@@ -4077,10 +3944,6 @@ static int __init maple_tree_seed(void)
check_fuzzer(&tree);
mtree_destroy(&tree);
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_dup(&tree);
mtree_destroy(&tree);
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_bnode_min_spanning(&tree);
mtree_destroy(&tree);

View File

@@ -194,6 +194,7 @@ menu "Slab allocator options"
config SLUB
def_bool y
select IRQ_WORK
config KVFREE_RCU_BATCHED
def_bool y

View File

@@ -842,6 +842,10 @@ static inline struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned int ord
#define alloc_frozen_pages(...) \
alloc_hooks(alloc_frozen_pages_noprof(__VA_ARGS__))
struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order);
#define alloc_frozen_pages_nolock(...) \
alloc_hooks(alloc_frozen_pages_nolock_noprof(__VA_ARGS__))
extern void zone_pcp_reset(struct zone *zone);
extern void zone_pcp_disable(struct zone *zone);
extern void zone_pcp_enable(struct zone *zone);

View File

@@ -252,7 +252,7 @@ bool __kasan_slab_pre_free(struct kmem_cache *cache, void *object,
}
bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init,
bool still_accessible)
bool still_accessible, bool no_quarantine)
{
if (!kasan_arch_is_ready() || is_kfence_address(object))
return false;
@@ -274,6 +274,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init,
poison_slab_object(cache, object, init);
if (no_quarantine)
return false;
/*
* If the object is put into quarantine, do not let slab put the object
* onto the freelist for now. The object's metadata is kept until the

View File

@@ -7478,22 +7478,7 @@ static bool __free_unaccepted(struct page *page)
#endif /* CONFIG_UNACCEPTED_MEMORY */
/**
* alloc_pages_nolock - opportunistic reentrant allocation from any context
* @nid: node to allocate from
* @order: allocation order size
*
* Allocates pages of a given order from the given node. This is safe to
* call from any context (from atomic, NMI, and also reentrant
* allocator -> tracepoint -> alloc_pages_nolock_noprof).
* Allocation is best effort and to be expected to fail easily so nobody should
* rely on the success. Failures are not reported via warn_alloc().
* See always fail conditions below.
*
* Return: allocated page or NULL on failure. NULL does not mean EBUSY or EAGAIN.
* It means ENOMEM. There is no reason to call it again and expect !NULL.
*/
struct page *alloc_pages_nolock_noprof(int nid, unsigned int order)
struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order)
{
/*
* Do not specify __GFP_DIRECT_RECLAIM, since direct claim is not allowed.
@@ -7515,12 +7500,13 @@ struct page *alloc_pages_nolock_noprof(int nid, unsigned int order)
* specify it here to highlight that alloc_pages_nolock()
* doesn't want to deplete reserves.
*/
gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC
| __GFP_ACCOUNT;
gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC | __GFP_COMP
| gfp_flags;
unsigned int alloc_flags = ALLOC_TRYLOCK;
struct alloc_context ac = { };
struct page *page;
VM_WARN_ON_ONCE(gfp_flags & ~__GFP_ACCOUNT);
/*
* In PREEMPT_RT spin_trylock() will call raw_spin_lock() which is
* unsafe in NMI. If spin_trylock() is called from hard IRQ the current
@@ -7555,15 +7541,38 @@ struct page *alloc_pages_nolock_noprof(int nid, unsigned int order)
/* Unlike regular alloc_pages() there is no __alloc_pages_slowpath(). */
if (page)
set_page_refcounted(page);
if (memcg_kmem_online() && page &&
if (memcg_kmem_online() && page && (gfp_flags & __GFP_ACCOUNT) &&
unlikely(__memcg_kmem_charge_page(page, alloc_gfp, order) != 0)) {
free_pages_nolock(page, order);
__free_frozen_pages(page, order, FPI_TRYLOCK);
page = NULL;
}
trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
kmsan_alloc_page(page, order, alloc_gfp);
return page;
}
/**
* alloc_pages_nolock - opportunistic reentrant allocation from any context
* @gfp_flags: GFP flags. Only __GFP_ACCOUNT allowed.
* @nid: node to allocate from
* @order: allocation order size
*
* Allocates pages of a given order from the given node. This is safe to
* call from any context (from atomic, NMI, and also reentrant
* allocator -> tracepoint -> alloc_pages_nolock_noprof).
* Allocation is best effort and to be expected to fail easily so nobody should
* rely on the success. Failures are not reported via warn_alloc().
* See always fail conditions below.
*
* Return: allocated page or NULL on failure. NULL does not mean EBUSY or EAGAIN.
* It means ENOMEM. There is no reason to call it again and expect !NULL.
*/
struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order)
{
struct page *page;
page = alloc_frozen_pages_nolock_noprof(gfp_flags, nid, order);
if (page)
set_page_refcounted(page);
return page;
}
EXPORT_SYMBOL_GPL(alloc_pages_nolock_noprof);

View File

@@ -57,6 +57,10 @@ struct slab {
struct {
union {
struct list_head slab_list;
struct { /* For deferred deactivate_slab() */
struct llist_node llnode;
void *flush_freelist;
};
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct {
struct slab *next;
@@ -234,7 +238,9 @@ struct kmem_cache_order_objects {
struct kmem_cache {
#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
struct lock_class_key lock_key;
#endif
struct slub_percpu_sheaves __percpu *cpu_sheaves;
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
@@ -248,6 +254,7 @@ struct kmem_cache {
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
unsigned int sheaf_capacity;
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
@@ -433,6 +440,9 @@ static inline bool is_kmalloc_normal(struct kmem_cache *s)
return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT));
}
bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj);
void flush_all_rcu_sheaves(void);
#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_PANIC | \
SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS | \
@@ -526,8 +536,12 @@ static inline struct slabobj_ext *slab_obj_exts(struct slab *slab)
unsigned long obj_exts = READ_ONCE(slab->obj_exts);
#ifdef CONFIG_MEMCG
VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS),
slab_page(slab));
/*
* obj_exts should be either NULL, a valid pointer with
* MEMCG_DATA_OBJEXTS bit set or be equal to OBJEXTS_ALLOC_FAIL.
*/
VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS) &&
obj_exts != OBJEXTS_ALLOC_FAIL, slab_page(slab));
VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab));
#endif
return (struct slabobj_ext *)(obj_exts & ~OBJEXTS_FLAGS_MASK);
@@ -656,6 +670,8 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user);
void defer_free_barrier(void);
static inline bool slub_debug_orig_size(struct kmem_cache *s)
{
return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&

View File

@@ -163,6 +163,9 @@ int slab_unmergeable(struct kmem_cache *s)
return 1;
#endif
if (s->cpu_sheaves)
return 1;
/*
* We may have set a slab to be unmergeable during bootstrap.
*/
@@ -321,7 +324,7 @@ struct kmem_cache *__kmem_cache_create_args(const char *name,
object_size - args->usersize < args->useroffset))
args->usersize = args->useroffset = 0;
if (!args->usersize)
if (!args->usersize && !args->sheaf_capacity)
s = __kmem_cache_alias(name, object_size, args->align, flags,
args->ctor);
if (s)
@@ -507,6 +510,9 @@ void kmem_cache_destroy(struct kmem_cache *s)
rcu_barrier();
}
/* Wait for deferred work from kmalloc/kfree_nolock() */
defer_free_barrier();
cpus_read_lock();
mutex_lock(&slab_mutex);
@@ -1605,6 +1611,30 @@ static void kfree_rcu_work(struct work_struct *work)
kvfree_rcu_list(head);
}
static bool kfree_rcu_sheaf(void *obj)
{
struct kmem_cache *s;
struct folio *folio;
struct slab *slab;
if (is_vmalloc_addr(obj))
return false;
folio = virt_to_folio(obj);
if (unlikely(!folio_test_slab(folio)))
return false;
slab = folio_slab(folio);
s = slab->slab_cache;
if (s->cpu_sheaves) {
if (likely(!IS_ENABLED(CONFIG_NUMA) ||
slab_nid(slab) == numa_mem_id()))
return __kfree_rcu_sheaf(s, obj);
}
return false;
}
static bool
need_offload_krc(struct kfree_rcu_cpu *krcp)
{
@@ -1949,6 +1979,9 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
if (!head)
might_sleep();
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && kfree_rcu_sheaf(ptr))
return;
// Queue the object but don't yet schedule the batch.
if (debug_rcu_head_queue(ptr)) {
// Probable double kfree_rcu(), just leak.
@@ -2023,6 +2056,8 @@ void kvfree_rcu_barrier(void)
bool queued;
int i, cpu;
flush_all_rcu_sheaves();
/*
* Firstly we detach objects and queue them over an RCU-batch
* for all CPUs. Finally queued works are flushed for each CPU.

2361
mm/slub.c

File diff suppressed because it is too large Load Diff

View File

@@ -16,6 +16,7 @@ void __init vma_state_init(void)
struct kmem_cache_args args = {
.use_freeptr_offset = true,
.freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
.sheaf_capacity = 32,
};
vm_area_cachep = kmem_cache_create("vm_area_struct",

View File

@@ -4,11 +4,31 @@
#include <linux/types.h>
#include <linux/gfp.h>
#include <pthread.h>
#define SLAB_PANIC 2
#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
#define kzalloc_node(size, flags, node) kmalloc(size, flags)
enum _slab_flag_bits {
_SLAB_KMALLOC,
_SLAB_HWCACHE_ALIGN,
_SLAB_PANIC,
_SLAB_TYPESAFE_BY_RCU,
_SLAB_ACCOUNT,
_SLAB_FLAGS_LAST_BIT
};
#define __SLAB_FLAG_BIT(nr) ((unsigned int __force)(1U << (nr)))
#define __SLAB_FLAG_UNUSED ((unsigned int __force)(0U))
#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
#ifdef CONFIG_MEMCG
# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
#else
# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED
#endif
void *kmalloc(size_t size, gfp_t gfp);
void kfree(void *p);
@@ -23,6 +43,98 @@ enum slab_state {
FULL
};
struct kmem_cache {
pthread_mutex_t lock;
unsigned int size;
unsigned int align;
unsigned int sheaf_capacity;
int nr_objs;
void *objs;
void (*ctor)(void *);
bool non_kernel_enabled;
unsigned int non_kernel;
unsigned long nr_allocated;
unsigned long nr_tallocated;
bool exec_callback;
void (*callback)(void *);
void *private;
};
struct kmem_cache_args {
/**
* @align: The required alignment for the objects.
*
* %0 means no specific alignment is requested.
*/
unsigned int align;
/**
* @sheaf_capacity: The maximum size of the sheaf.
*/
unsigned int sheaf_capacity;
/**
* @useroffset: Usercopy region offset.
*
* %0 is a valid offset, when @usersize is non-%0
*/
unsigned int useroffset;
/**
* @usersize: Usercopy region size.
*
* %0 means no usercopy region is specified.
*/
unsigned int usersize;
/**
* @freeptr_offset: Custom offset for the free pointer
* in &SLAB_TYPESAFE_BY_RCU caches
*
* By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
* outside of the object. This might cause the object to grow in size.
* Cache creators that have a reason to avoid this can specify a custom
* free pointer offset in their struct where the free pointer will be
* placed.
*
* Note that placing the free pointer inside the object requires the
* caller to ensure that no fields are invalidated that are required to
* guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
* details).
*
* Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
* is specified, %use_freeptr_offset must be set %true.
*
* Note that @ctor currently isn't supported with custom free pointers
* as a @ctor requires an external free pointer.
*/
unsigned int freeptr_offset;
/**
* @use_freeptr_offset: Whether a @freeptr_offset is used.
*/
bool use_freeptr_offset;
/**
* @ctor: A constructor for the objects.
*
* The constructor is invoked for each object in a newly allocated slab
* page. It is the cache user's responsibility to free object in the
* same state as after calling the constructor, or deal appropriately
* with any differences between a freshly constructed and a reallocated
* object.
*
* %NULL means no constructor.
*/
void (*ctor)(void *);
};
struct slab_sheaf {
union {
struct list_head barn_list;
/* only used for prefilled sheafs */
unsigned int capacity;
};
struct kmem_cache *cache;
unsigned int size;
int node; /* only used for rcu_sheaf */
void *objects[];
};
static inline void *kzalloc(size_t size, gfp_t gfp)
{
return kmalloc(size, gfp | __GFP_ZERO);
@@ -37,12 +149,57 @@ static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
}
void kmem_cache_free(struct kmem_cache *cachep, void *objp);
struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
unsigned int align, unsigned int flags,
void (*ctor)(void *));
struct kmem_cache *
__kmem_cache_create_args(const char *name, unsigned int size,
struct kmem_cache_args *args, unsigned int flags);
/* If NULL is passed for @args, use this variant with default arguments. */
static inline struct kmem_cache *
__kmem_cache_default_args(const char *name, unsigned int size,
struct kmem_cache_args *args, unsigned int flags)
{
struct kmem_cache_args kmem_default_args = {};
return __kmem_cache_create_args(name, size, &kmem_default_args, flags);
}
static inline struct kmem_cache *
__kmem_cache_create(const char *name, unsigned int size, unsigned int align,
unsigned int flags, void (*ctor)(void *))
{
struct kmem_cache_args kmem_args = {
.align = align,
.ctor = ctor,
};
return __kmem_cache_create_args(name, size, &kmem_args, flags);
}
#define kmem_cache_create(__name, __object_size, __args, ...) \
_Generic((__args), \
struct kmem_cache_args *: __kmem_cache_create_args, \
void *: __kmem_cache_default_args, \
default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__)
void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list);
int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
void **list);
struct slab_sheaf *
kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size);
void *
kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf *sheaf);
void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf *sheaf);
int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf **sheafp, unsigned int size);
static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf)
{
return sheaf->size;
}
#endif /* _TOOLS_SLAB_H */

View File

@@ -8,14 +8,6 @@
* difficult to handle in kernel tests.
*/
#define CONFIG_DEBUG_MAPLE_TREE
#define CONFIG_MAPLE_SEARCH
#define MAPLE_32BIT (MAPLE_NODE_SLOTS > 31)
#include "test.h"
#include <stdlib.h>
#include <time.h>
#include <linux/init.h>
#define module_init(x)
#define module_exit(x)
#define MODULE_AUTHOR(x)
@@ -23,7 +15,9 @@
#define MODULE_LICENSE(x)
#define dump_stack() assert(0)
#include "../../../lib/maple_tree.c"
#include "test.h"
#include "../shared/maple-shim.c"
#include "../../../lib/test_maple_tree.c"
#define RCU_RANGE_COUNT 1000
@@ -63,430 +57,6 @@ struct rcu_reader_struct {
struct rcu_test_struct2 *test;
};
static int get_alloc_node_count(struct ma_state *mas)
{
int count = 1;
struct maple_alloc *node = mas->alloc;
if (!node || ((unsigned long)node & 0x1))
return 0;
while (node->node_count) {
count += node->node_count;
node = node->slot[0];
}
return count;
}
static void check_mas_alloc_node_count(struct ma_state *mas)
{
mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 1, GFP_KERNEL);
mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 3, GFP_KERNEL);
MT_BUG_ON(mas->tree, get_alloc_node_count(mas) != mas->alloc->total);
mas_destroy(mas);
}
/*
* check_new_node() - Check the creation of new nodes and error path
* verification.
*/
static noinline void __init check_new_node(struct maple_tree *mt)
{
struct maple_node *mn, *mn2, *mn3;
struct maple_alloc *smn;
struct maple_node *nodes[100];
int i, j, total;
MA_STATE(mas, mt, 0, 0);
check_mas_alloc_node_count(&mas);
/* Try allocating 3 nodes */
mtree_lock(mt);
mt_set_non_kernel(0);
/* request 3 nodes to be allocated. */
mas_node_count(&mas, 3);
/* Allocation request of 3. */
MT_BUG_ON(mt, mas_alloc_req(&mas) != 3);
/* Allocate failed. */
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
MT_BUG_ON(mt, mas_allocated(&mas) != 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, mas.alloc == NULL);
MT_BUG_ON(mt, mas.alloc->slot[0] == NULL);
mas_push_node(&mas, mn);
mas_reset(&mas);
mas_destroy(&mas);
mtree_unlock(mt);
/* Try allocating 1 node, then 2 more */
mtree_lock(mt);
/* Set allocation request to 1. */
mas_set_alloc_req(&mas, 1);
/* Check Allocation request of 1. */
MT_BUG_ON(mt, mas_alloc_req(&mas) != 1);
mas_set_err(&mas, -ENOMEM);
/* Validate allocation request. */
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
/* Eat the requested node. */
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, mn->slot[0] != NULL);
MT_BUG_ON(mt, mn->slot[1] != NULL);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mas.status = ma_start;
mas_destroy(&mas);
/* Allocate 3 nodes, will fail. */
mas_node_count(&mas, 3);
/* Drop the lock and allocate 3 nodes. */
mas_nomem(&mas, GFP_KERNEL);
/* Ensure 3 are allocated. */
MT_BUG_ON(mt, mas_allocated(&mas) != 3);
/* Allocation request of 0. */
MT_BUG_ON(mt, mas_alloc_req(&mas) != 0);
MT_BUG_ON(mt, mas.alloc == NULL);
MT_BUG_ON(mt, mas.alloc->slot[0] == NULL);
MT_BUG_ON(mt, mas.alloc->slot[1] == NULL);
/* Ensure we counted 3. */
MT_BUG_ON(mt, mas_allocated(&mas) != 3);
/* Free. */
mas_reset(&mas);
mas_destroy(&mas);
/* Set allocation request to 1. */
mas_set_alloc_req(&mas, 1);
MT_BUG_ON(mt, mas_alloc_req(&mas) != 1);
mas_set_err(&mas, -ENOMEM);
/* Validate allocation request. */
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
MT_BUG_ON(mt, mas_allocated(&mas) != 1);
/* Check the node is only one node. */
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, mn->slot[0] != NULL);
MT_BUG_ON(mt, mn->slot[1] != NULL);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mas_push_node(&mas, mn);
MT_BUG_ON(mt, mas_allocated(&mas) != 1);
MT_BUG_ON(mt, mas.alloc->node_count);
mas_set_alloc_req(&mas, 2); /* request 2 more. */
MT_BUG_ON(mt, mas_alloc_req(&mas) != 2);
mas_set_err(&mas, -ENOMEM);
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
MT_BUG_ON(mt, mas_allocated(&mas) != 3);
MT_BUG_ON(mt, mas.alloc == NULL);
MT_BUG_ON(mt, mas.alloc->slot[0] == NULL);
MT_BUG_ON(mt, mas.alloc->slot[1] == NULL);
for (i = 2; i >= 0; i--) {
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != i);
MT_BUG_ON(mt, !mn);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
total = 64;
mas_set_alloc_req(&mas, total); /* request 2 more. */
MT_BUG_ON(mt, mas_alloc_req(&mas) != total);
mas_set_err(&mas, -ENOMEM);
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
for (i = total; i > 0; i--) {
unsigned int e = 0; /* expected node_count */
if (!MAPLE_32BIT) {
if (i >= 35)
e = i - 34;
else if (i >= 5)
e = i - 4;
else if (i >= 2)
e = i - 1;
} else {
if (i >= 4)
e = i - 3;
else if (i >= 1)
e = i - 1;
else
e = 0;
}
MT_BUG_ON(mt, mas.alloc->node_count != e);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != i - 1);
MT_BUG_ON(mt, !mn);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
total = 100;
for (i = 1; i < total; i++) {
mas_set_alloc_req(&mas, i);
mas_set_err(&mas, -ENOMEM);
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
for (j = i; j > 0; j--) {
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != j - 1);
MT_BUG_ON(mt, !mn);
MT_BUG_ON(mt, not_empty(mn));
mas_push_node(&mas, mn);
MT_BUG_ON(mt, mas_allocated(&mas) != j);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != j - 1);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mas_set_alloc_req(&mas, i);
mas_set_err(&mas, -ENOMEM);
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
for (j = 0; j <= i/2; j++) {
MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
nodes[j] = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1);
}
while (j) {
j--;
mas_push_node(&mas, nodes[j]);
MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
}
MT_BUG_ON(mt, mas_allocated(&mas) != i);
for (j = 0; j <= i/2; j++) {
MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1);
}
mas_reset(&mas);
MT_BUG_ON(mt, mas_nomem(&mas, GFP_KERNEL));
mas_destroy(&mas);
}
/* Set allocation request. */
total = 500;
mas_node_count(&mas, total);
/* Drop the lock and allocate the nodes. */
mas_nomem(&mas, GFP_KERNEL);
MT_BUG_ON(mt, !mas.alloc);
i = 1;
smn = mas.alloc;
while (i < total) {
for (j = 0; j < MAPLE_ALLOC_SLOTS; j++) {
i++;
MT_BUG_ON(mt, !smn->slot[j]);
if (i == total)
break;
}
smn = smn->slot[0]; /* next. */
}
MT_BUG_ON(mt, mas_allocated(&mas) != total);
mas_reset(&mas);
mas_destroy(&mas); /* Free. */
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
for (i = 1; i < 128; i++) {
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
MT_BUG_ON(mt, mas_allocated(&mas) != i); /* check request filled */
for (j = i; j > 0; j--) { /*Free the requests */
mn = mas_pop_node(&mas); /* get the next node. */
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
}
for (i = 1; i < MAPLE_NODE_MASK + 1; i++) {
MA_STATE(mas2, mt, 0, 0);
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
MT_BUG_ON(mt, mas_allocated(&mas) != i); /* check request filled */
for (j = 1; j <= i; j++) { /* Move the allocations to mas2 */
mn = mas_pop_node(&mas); /* get the next node. */
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, not_empty(mn));
mas_push_node(&mas2, mn);
MT_BUG_ON(mt, mas_allocated(&mas2) != j);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
MT_BUG_ON(mt, mas_allocated(&mas2) != i);
for (j = i; j > 0; j--) { /*Free the requests */
MT_BUG_ON(mt, mas_allocated(&mas2) != j);
mn = mas_pop_node(&mas2); /* get the next node. */
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas2) != 0);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
mn = mas_pop_node(&mas); /* get the next node. */
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
mas_push_node(&mas, mn);
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
/* Check the limit of pop/push/pop */
mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */
MT_BUG_ON(mt, mas_alloc_req(&mas) != 1);
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
MT_BUG_ON(mt, mas_alloc_req(&mas));
MT_BUG_ON(mt, mas.alloc->node_count != 1);
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
mas_push_node(&mas, mn);
MT_BUG_ON(mt, mas.alloc->node_count != 1);
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
for (i = 1; i <= MAPLE_ALLOC_SLOTS + 1; i++) {
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
for (i = 3; i < MAPLE_NODE_MASK * 3; i++) {
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mn = mas_pop_node(&mas); /* get the next node. */
mas_push_node(&mas, mn); /* put it back */
mas_destroy(&mas);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mn = mas_pop_node(&mas); /* get the next node. */
mn2 = mas_pop_node(&mas); /* get the next node. */
mas_push_node(&mas, mn); /* put them back */
mas_push_node(&mas, mn2);
mas_destroy(&mas);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mn = mas_pop_node(&mas); /* get the next node. */
mn2 = mas_pop_node(&mas); /* get the next node. */
mn3 = mas_pop_node(&mas); /* get the next node. */
mas_push_node(&mas, mn); /* put them back */
mas_push_node(&mas, mn2);
mas_push_node(&mas, mn3);
mas_destroy(&mas);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mn = mas_pop_node(&mas); /* get the next node. */
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mas_destroy(&mas);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mn = mas_pop_node(&mas); /* get the next node. */
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mn = mas_pop_node(&mas); /* get the next node. */
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mn = mas_pop_node(&mas); /* get the next node. */
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mas_destroy(&mas);
}
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, 5); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
MT_BUG_ON(mt, mas_allocated(&mas) != 5);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, 10); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mas.status = ma_start;
MT_BUG_ON(mt, mas_allocated(&mas) != 10);
mas_destroy(&mas);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, MAPLE_ALLOC_SLOTS - 1); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS - 1);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, 10 + MAPLE_ALLOC_SLOTS - 1); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mas.status = ma_start;
MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1);
mas_destroy(&mas);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 2); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mas.status = ma_start;
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 2);
mas_destroy(&mas);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 1); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 1);
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 3 + 2); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mas.status = ma_start;
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 3 + 2);
mas_destroy(&mas);
mtree_unlock(mt);
}
/*
* Check erasing including RCU.
*/
@@ -35455,17 +35025,6 @@ static void check_dfs_preorder(struct maple_tree *mt)
MT_BUG_ON(mt, count != e);
mtree_destroy(mt);
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
mas_reset(&mas);
mt_zero_nr_tallocated();
mt_set_non_kernel(200);
mas_expected_entries(&mas, max);
for (count = 0; count <= max; count++) {
mas.index = mas.last = count;
mas_store(&mas, xa_mk_value(count));
MT_BUG_ON(mt, mas_is_err(&mas));
}
mas_destroy(&mas);
rcu_barrier();
/*
* pr_info(" ->seq test of 0-%lu %luK in %d active (%d total)\n",
@@ -35524,6 +35083,18 @@ static unsigned char get_vacant_height(struct ma_wr_state *wr_mas, void *entry)
return vacant_height;
}
static int mas_allocated(struct ma_state *mas)
{
int total = 0;
if (mas->alloc)
total++;
if (mas->sheaf)
total += kmem_cache_sheaf_size(mas->sheaf);
return total;
}
/* Preallocation testing */
static noinline void __init check_prealloc(struct maple_tree *mt)
{
@@ -35542,7 +35113,10 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
/* Spanning store */
mas_set_range(&mas, 470, 500);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_wr_preallocate(&wr_mas, ptr);
MT_BUG_ON(mt, mas.store_type != wr_spanning_store);
MT_BUG_ON(mt, mas_is_err(&mas));
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
vacant_height = get_vacant_height(&wr_mas, ptr);
@@ -35552,6 +35126,7 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
mas_wr_preallocate(&wr_mas, ptr);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
@@ -35592,20 +35167,6 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mas_push_node(&mas, mn);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
@@ -36406,11 +35967,17 @@ static void check_nomem_writer_race(struct maple_tree *mt)
check_load(mt, 6, xa_mk_value(0xC));
mtree_unlock(mt);
mt_set_non_kernel(0);
/* test for the same race but with mas_store_gfp() */
mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL);
mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL);
mas_set_range(&mas, 0, 5);
/* setup writer 2 that will trigger the race condition */
mt_set_private(mt);
mt_set_callback(writer2);
mtree_lock(mt);
mas_store_gfp(&mas, NULL, GFP_KERNEL);
@@ -36454,27 +36021,6 @@ static inline int check_vma_modification(struct maple_tree *mt)
return 0;
}
/*
* test to check that bulk stores do not use wr_rebalance as the store
* type.
*/
static inline void check_bulk_rebalance(struct maple_tree *mt)
{
MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX);
int max = 10;
build_full_tree(mt, 0, 2);
/* erase every entry in the tree */
do {
/* set up bulk store mode */
mas_expected_entries(&mas, max);
mas_erase(&mas);
MT_BUG_ON(mt, mas.store_type == wr_rebalance);
} while (mas_prev(&mas, 0) != NULL);
mas_destroy(&mas);
}
void farmer_tests(void)
{
@@ -36487,10 +36033,6 @@ void farmer_tests(void)
check_vma_modification(&tree);
mtree_destroy(&tree);
mt_init(&tree);
check_bulk_rebalance(&tree);
mtree_destroy(&tree);
tree.ma_root = xa_mk_value(0);
mt_dump(&tree, mt_dump_dec);
@@ -36550,10 +36092,6 @@ void farmer_tests(void)
check_erase_testset(&tree);
mtree_destroy(&tree);
mt_init_flags(&tree, 0);
check_new_node(&tree);
mtree_destroy(&tree);
if (!MAPLE_32BIT) {
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_rcu_simulated(&tree);

View File

@@ -16,21 +16,6 @@ int nr_allocated;
int preempt_count;
int test_verbose;
struct kmem_cache {
pthread_mutex_t lock;
unsigned int size;
unsigned int align;
int nr_objs;
void *objs;
void (*ctor)(void *);
unsigned int non_kernel;
unsigned long nr_allocated;
unsigned long nr_tallocated;
bool exec_callback;
void (*callback)(void *);
void *private;
};
void kmem_cache_set_callback(struct kmem_cache *cachep, void (*callback)(void *))
{
cachep->callback = callback;
@@ -79,7 +64,8 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
if (!(gfp & __GFP_DIRECT_RECLAIM)) {
if (!cachep->non_kernel) {
cachep->exec_callback = true;
if (cachep->callback)
cachep->exec_callback = true;
return NULL;
}
@@ -152,6 +138,12 @@ void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list)
if (kmalloc_verbose)
pr_debug("Bulk free %p[0-%zu]\n", list, size - 1);
if (cachep->exec_callback) {
if (cachep->callback)
cachep->callback(cachep->private);
cachep->exec_callback = false;
}
pthread_mutex_lock(&cachep->lock);
for (int i = 0; i < size; i++)
kmem_cache_free_locked(cachep, list[i]);
@@ -219,6 +211,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
for (i = 0; i < size; i++)
__kmem_cache_free_locked(cachep, p[i]);
pthread_mutex_unlock(&cachep->lock);
if (cachep->callback)
cachep->exec_callback = true;
return 0;
}
@@ -234,26 +228,112 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
}
struct kmem_cache *
kmem_cache_create(const char *name, unsigned int size, unsigned int align,
unsigned int flags, void (*ctor)(void *))
__kmem_cache_create_args(const char *name, unsigned int size,
struct kmem_cache_args *args,
unsigned int flags)
{
struct kmem_cache *ret = malloc(sizeof(*ret));
pthread_mutex_init(&ret->lock, NULL);
ret->size = size;
ret->align = align;
ret->align = args->align;
ret->sheaf_capacity = args->sheaf_capacity;
ret->nr_objs = 0;
ret->nr_allocated = 0;
ret->nr_tallocated = 0;
ret->objs = NULL;
ret->ctor = ctor;
ret->ctor = args->ctor;
ret->non_kernel = 0;
ret->exec_callback = false;
ret->callback = NULL;
ret->private = NULL;
return ret;
}
struct slab_sheaf *
kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
{
struct slab_sheaf *sheaf;
unsigned int capacity;
if (s->exec_callback) {
if (s->callback)
s->callback(s->private);
s->exec_callback = false;
}
capacity = max(size, s->sheaf_capacity);
sheaf = calloc(1, sizeof(*sheaf) + sizeof(void *) * capacity);
if (!sheaf)
return NULL;
sheaf->cache = s;
sheaf->capacity = capacity;
sheaf->size = kmem_cache_alloc_bulk(s, gfp, size, sheaf->objects);
if (!sheaf->size) {
free(sheaf);
return NULL;
}
return sheaf;
}
int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf **sheafp, unsigned int size)
{
struct slab_sheaf *sheaf = *sheafp;
int refill;
if (sheaf->size >= size)
return 0;
if (size > sheaf->capacity) {
sheaf = kmem_cache_prefill_sheaf(s, gfp, size);
if (!sheaf)
return -ENOMEM;
kmem_cache_return_sheaf(s, gfp, *sheafp);
*sheafp = sheaf;
return 0;
}
refill = kmem_cache_alloc_bulk(s, gfp, size - sheaf->size,
&sheaf->objects[sheaf->size]);
if (!refill)
return -ENOMEM;
sheaf->size += refill;
return 0;
}
void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf *sheaf)
{
if (sheaf->size)
kmem_cache_free_bulk(s, sheaf->size, &sheaf->objects[0]);
free(sheaf);
}
void *
kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf *sheaf)
{
void *obj;
if (sheaf->size == 0) {
printf("Nothing left in sheaf!\n");
return NULL;
}
obj = sheaf->objects[--sheaf->size];
sheaf->objects[sheaf->size] = NULL;
return obj;
}
/*
* Test the test infrastructure for kem_cache_alloc/free and bulk counterparts.
*/

View File

@@ -10,4 +10,15 @@
#include <time.h>
#include "linux/init.h"
void maple_rcu_cb(struct rcu_head *head);
#define rcu_cb maple_rcu_cb
#define kfree_rcu(_struct, _memb) \
do { \
typeof(_struct) _p_struct = (_struct); \
\
call_rcu(&((_p_struct)->_memb), rcu_cb); \
} while(0);
#endif /* __MAPLE_SHARED_H__ */

View File

@@ -3,5 +3,12 @@
/* Very simple shim around the maple tree. */
#include "maple-shared.h"
#include <linux/slab.h>
#include "../../../lib/maple_tree.c"
void maple_rcu_cb(struct rcu_head *head) {
struct maple_node *node = container_of(head, struct maple_node, rcu);
kmem_cache_free(maple_node_cache, node);
}

View File

@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/rbtree.h>
#include <linux/refcount.h>
#include <linux/slab.h>
extern unsigned long stack_guard_gap;
#ifdef CONFIG_MMU
@@ -509,65 +510,6 @@ struct pagetable_move_control {
.len_in = len_, \
}
struct kmem_cache_args {
/**
* @align: The required alignment for the objects.
*
* %0 means no specific alignment is requested.
*/
unsigned int align;
/**
* @useroffset: Usercopy region offset.
*
* %0 is a valid offset, when @usersize is non-%0
*/
unsigned int useroffset;
/**
* @usersize: Usercopy region size.
*
* %0 means no usercopy region is specified.
*/
unsigned int usersize;
/**
* @freeptr_offset: Custom offset for the free pointer
* in &SLAB_TYPESAFE_BY_RCU caches
*
* By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
* outside of the object. This might cause the object to grow in size.
* Cache creators that have a reason to avoid this can specify a custom
* free pointer offset in their struct where the free pointer will be
* placed.
*
* Note that placing the free pointer inside the object requires the
* caller to ensure that no fields are invalidated that are required to
* guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
* details).
*
* Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
* is specified, %use_freeptr_offset must be set %true.
*
* Note that @ctor currently isn't supported with custom free pointers
* as a @ctor requires an external free pointer.
*/
unsigned int freeptr_offset;
/**
* @use_freeptr_offset: Whether a @freeptr_offset is used.
*/
bool use_freeptr_offset;
/**
* @ctor: A constructor for the objects.
*
* The constructor is invoked for each object in a newly allocated slab
* page. It is the cache user's responsibility to free object in the
* same state as after calling the constructor, or deal appropriately
* with any differences between a freshly constructed and a reallocated
* object.
*
* %NULL means no constructor.
*/
void (*ctor)(void *);
};
static inline void vma_iter_invalidate(struct vma_iterator *vmi)
{
mas_pause(&vmi->mas);
@@ -652,40 +594,6 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
vma->vm_lock_seq = UINT_MAX;
}
struct kmem_cache {
const char *name;
size_t object_size;
struct kmem_cache_args *args;
};
static inline struct kmem_cache *__kmem_cache_create(const char *name,
size_t object_size,
struct kmem_cache_args *args)
{
struct kmem_cache *ret = malloc(sizeof(struct kmem_cache));
ret->name = name;
ret->object_size = object_size;
ret->args = args;
return ret;
}
#define kmem_cache_create(__name, __object_size, __args, ...) \
__kmem_cache_create((__name), (__object_size), (__args))
static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
{
(void)gfpflags;
return calloc(s->object_size, 1);
}
static inline void kmem_cache_free(struct kmem_cache *s, void *x)
{
free(x);
}
/*
* These are defined in vma.h, but sadly vm_stat_account() is referenced by
* kernel/fork.c, so we have to these broadly available there, and temporarily
@@ -842,11 +750,11 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
}
static inline void fput(struct file *)
static inline void fput(struct file *file)
{
}
static inline void mpol_put(struct mempolicy *)
static inline void mpol_put(struct mempolicy *pol)
{
}
@@ -854,15 +762,15 @@ static inline void lru_add_drain(void)
{
}
static inline void tlb_gather_mmu(struct mmu_gather *, struct mm_struct *)
static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
{
}
static inline void update_hiwater_rss(struct mm_struct *)
static inline void update_hiwater_rss(struct mm_struct *mm)
{
}
static inline void update_hiwater_vm(struct mm_struct *)
static inline void update_hiwater_vm(struct mm_struct *mm)
{
}
@@ -871,36 +779,23 @@ static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
unsigned long end_addr, unsigned long tree_end,
bool mm_wr_locked)
{
(void)tlb;
(void)mas;
(void)vma;
(void)start_addr;
(void)end_addr;
(void)tree_end;
(void)mm_wr_locked;
}
static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *vma, unsigned long floor,
unsigned long ceiling, bool mm_wr_locked)
{
(void)tlb;
(void)mas;
(void)vma;
(void)floor;
(void)ceiling;
(void)mm_wr_locked;
}
static inline void mapping_unmap_writable(struct address_space *)
static inline void mapping_unmap_writable(struct address_space *mapping)
{
}
static inline void flush_dcache_mmap_lock(struct address_space *)
static inline void flush_dcache_mmap_lock(struct address_space *mapping)
{
}
static inline void tlb_finish_mmu(struct mmu_gather *)
static inline void tlb_finish_mmu(struct mmu_gather *tlb)
{
}
@@ -909,7 +804,7 @@ static inline struct file *get_file(struct file *f)
return f;
}
static inline int vma_dup_policy(struct vm_area_struct *, struct vm_area_struct *)
static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
{
return 0;
}
@@ -936,10 +831,6 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long end,
struct vm_area_struct *next)
{
(void)vma;
(void)start;
(void)end;
(void)next;
}
static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
@@ -959,51 +850,48 @@ static inline void vm_acct_memory(long pages)
{
}
static inline void vma_interval_tree_insert(struct vm_area_struct *,
struct rb_root_cached *)
static inline void vma_interval_tree_insert(struct vm_area_struct *vma,
struct rb_root_cached *rb)
{
}
static inline void vma_interval_tree_remove(struct vm_area_struct *,
struct rb_root_cached *)
static inline void vma_interval_tree_remove(struct vm_area_struct *vma,
struct rb_root_cached *rb)
{
}
static inline void flush_dcache_mmap_unlock(struct address_space *)
static inline void flush_dcache_mmap_unlock(struct address_space *mapping)
{
}
static inline void anon_vma_interval_tree_insert(struct anon_vma_chain*,
struct rb_root_cached *)
static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc,
struct rb_root_cached *rb)
{
}
static inline void anon_vma_interval_tree_remove(struct anon_vma_chain*,
struct rb_root_cached *)
static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc,
struct rb_root_cached *rb)
{
}
static inline void uprobe_mmap(struct vm_area_struct *)
static inline void uprobe_mmap(struct vm_area_struct *vma)
{
}
static inline void uprobe_munmap(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
(void)vma;
(void)start;
(void)end;
}
static inline void i_mmap_lock_write(struct address_space *)
static inline void i_mmap_lock_write(struct address_space *mapping)
{
}
static inline void anon_vma_lock_write(struct anon_vma *)
static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
{
}
static inline void vma_assert_write_locked(struct vm_area_struct *)
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
{
}
@@ -1013,16 +901,16 @@ static inline void unlink_anon_vmas(struct vm_area_struct *vma)
vma->anon_vma->was_unlinked = true;
}
static inline void anon_vma_unlock_write(struct anon_vma *)
static inline void anon_vma_unlock_write(struct anon_vma *anon_vma)
{
}
static inline void i_mmap_unlock_write(struct address_space *)
static inline void i_mmap_unlock_write(struct address_space *mapping)
{
}
static inline void anon_vma_merge(struct vm_area_struct *,
struct vm_area_struct *)
static inline void anon_vma_merge(struct vm_area_struct *vma,
struct vm_area_struct *next)
{
}
@@ -1031,27 +919,22 @@ static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long end,
struct list_head *unmaps)
{
(void)vma;
(void)start;
(void)end;
(void)unmaps;
return 0;
}
static inline void mmap_write_downgrade(struct mm_struct *)
static inline void mmap_write_downgrade(struct mm_struct *mm)
{
}
static inline void mmap_read_unlock(struct mm_struct *)
static inline void mmap_read_unlock(struct mm_struct *mm)
{
}
static inline void mmap_write_unlock(struct mm_struct *)
static inline void mmap_write_unlock(struct mm_struct *mm)
{
}
static inline int mmap_write_lock_killable(struct mm_struct *)
static inline int mmap_write_lock_killable(struct mm_struct *mm)
{
return 0;
}
@@ -1060,10 +943,6 @@ static inline bool can_modify_mm(struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
(void)mm;
(void)start;
(void)end;
return true;
}
@@ -1071,16 +950,13 @@ static inline void arch_unmap(struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
(void)mm;
(void)start;
(void)end;
}
static inline void mmap_assert_locked(struct mm_struct *)
static inline void mmap_assert_locked(struct mm_struct *mm)
{
}
static inline bool mpol_equal(struct mempolicy *, struct mempolicy *)
static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
return true;
}
@@ -1088,63 +964,62 @@ static inline bool mpol_equal(struct mempolicy *, struct mempolicy *)
static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
(void)vma;
(void)vm_flags;
}
static inline bool mapping_can_writeback(struct address_space *)
static inline bool mapping_can_writeback(struct address_space *mapping)
{
return true;
}
static inline bool is_vm_hugetlb_page(struct vm_area_struct *)
static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
{
return false;
}
static inline bool vma_soft_dirty_enabled(struct vm_area_struct *)
static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
{
return false;
}
static inline bool userfaultfd_wp(struct vm_area_struct *)
static inline bool userfaultfd_wp(struct vm_area_struct *vma)
{
return false;
}
static inline void mmap_assert_write_locked(struct mm_struct *)
static inline void mmap_assert_write_locked(struct mm_struct *mm)
{
}
static inline void mutex_lock(struct mutex *)
static inline void mutex_lock(struct mutex *lock)
{
}
static inline void mutex_unlock(struct mutex *)
static inline void mutex_unlock(struct mutex *lock)
{
}
static inline bool mutex_is_locked(struct mutex *)
static inline bool mutex_is_locked(struct mutex *lock)
{
return true;
}
static inline bool signal_pending(void *)
static inline bool signal_pending(void *p)
{
return false;
}
static inline bool is_file_hugepages(struct file *)
static inline bool is_file_hugepages(struct file *file)
{
return false;
}
static inline int security_vm_enough_memory_mm(struct mm_struct *, long)
static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
{
return 0;
}
static inline bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long)
static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
unsigned long npages)
{
return true;
}
@@ -1169,7 +1044,7 @@ static inline void vm_flags_clear(struct vm_area_struct *vma,
vma->__vm_flags &= ~flags;
}
static inline int shmem_zero_setup(struct vm_area_struct *)
static inline int shmem_zero_setup(struct vm_area_struct *vma)
{
return 0;
}
@@ -1179,20 +1054,20 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
vma->vm_ops = NULL;
}
static inline void ksm_add_vma(struct vm_area_struct *)
static inline void ksm_add_vma(struct vm_area_struct *vma)
{
}
static inline void perf_event_mmap(struct vm_area_struct *)
static inline void perf_event_mmap(struct vm_area_struct *vma)
{
}
static inline bool vma_is_dax(struct vm_area_struct *)
static inline bool vma_is_dax(struct vm_area_struct *vma)
{
return false;
}
static inline struct vm_area_struct *get_gate_vma(struct mm_struct *)
static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
return NULL;
}
@@ -1217,16 +1092,16 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma)
WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
}
static inline bool arch_validate_flags(vm_flags_t)
static inline bool arch_validate_flags(vm_flags_t flags)
{
return true;
}
static inline void vma_close(struct vm_area_struct *)
static inline void vma_close(struct vm_area_struct *vma)
{
}
static inline int mmap_file(struct file *, struct vm_area_struct *)
static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
{
return 0;
}
@@ -1388,8 +1263,6 @@ static inline int mapping_map_writable(struct address_space *mapping)
static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)
{
(void)pmc;
return 0;
}
@@ -1397,51 +1270,38 @@ static inline void free_pgd_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling)
{
(void)tlb;
(void)addr;
(void)end;
(void)floor;
(void)ceiling;
}
static inline int ksm_execve(struct mm_struct *mm)
{
(void)mm;
return 0;
}
static inline void ksm_exit(struct mm_struct *mm)
{
(void)mm;
}
static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
{
(void)vma;
(void)reset_refcnt;
if (reset_refcnt)
refcount_set(&vma->vm_refcnt, 0);
}
static inline void vma_numab_state_init(struct vm_area_struct *vma)
{
(void)vma;
}
static inline void vma_numab_state_free(struct vm_area_struct *vma)
{
(void)vma;
}
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
struct vm_area_struct *new_vma)
{
(void)orig_vma;
(void)new_vma;
}
static inline void free_anon_vma_name(struct vm_area_struct *vma)
{
(void)vma;
}
/* Declared in vma.h. */
@@ -1495,7 +1355,6 @@ static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
{
(void)vma;
}
static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
@@ -1506,13 +1365,13 @@ static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
fput(file);
}
static inline bool shmem_file(struct file *)
static inline bool shmem_file(struct file *file)
{
return false;
}
static inline vm_flags_t ksm_vma_flags(const struct mm_struct *, const struct file *,
vm_flags_t vm_flags)
static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
const struct file *file, vm_flags_t vm_flags)
{
return vm_flags;
}