点击(此处)折叠或打开
-
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
-
bool force_refill)
-
{
-
int batchcount;
-
struct kmem_list3 *l3;
-
struct array_cache *ac;
-
int node;
-
-
check_irq_off();
-
node = numa_mem_id();
-
if (unlikely(force_refill))
-
goto force_grow;
-
retry:
-
ac = cpu_cache_get(cachep);
-
batchcount = ac->batchcount;
-
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
-
/*
-
* If there was little recent activity on this cache, then
-
* perform only a partial refill. Otherwise we could generate
-
* refill bouncing.
-
*/
-
batchcount = BATCHREFILL_LIMIT;
-
}
-
l3 = cachep->nodelists[node];
-
BUG_ON(ac->avail > 0 || !l3);
-
spin_lock(&l3->list_lock);
-
-
/* See if we can refill from the shared array */
-
if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) {
-
l3->shared->touched = 1;
-
goto alloc_done;
-
}
-
-
while (batchcount > 0) {
-
struct list_head *entry;
-
struct slab *slabp;
-
/* Get slab alloc is to come from. */
-
entry = l3->slabs_partial.next;
-
if (entry == &l3->slabs_partial) {
-
l3->free_touched = 1;
-
entry = l3->slabs_free.next;
-
if (entry == &l3->slabs_free)
-
goto must_grow;
-
}
-
-
slabp = list_entry(entry, struct slab, list);
-
check_slabp(cachep, slabp);
-
check_spinlock_acquired(cachep);
-
-
/*
-
* The slab was either on partial or free list so
-
* there must be at least one object available for
-
* allocation.
-
*/
-
BUG_ON(slabp->inuse >= cachep->num);
-
-
while (slabp->inuse < cachep->num && batchcount--) {
-
STATS_INC_ALLOCED(cachep);
-
STATS_INC_ACTIVE(cachep);
-
STATS_SET_HIGH(cachep);
-
-
ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
-
node));
-
}
-
check_slabp(cachep, slabp);
-
-
/* move slabp to correct slabp list: */
-
list_del(&slabp->list);
-
if (slabp->free == BUFCTL_END)
-
list_add(&slabp->list, &l3->slabs_full);
-
else
-
list_add(&slabp->list, &l3->slabs_partial);
-
}
-
-
must_grow:
-
l3->free_objects -= ac->avail;
-
alloc_done:
-
spin_unlock(&l3->list_lock);
-
-
if (unlikely(!ac->avail)) {
-
int x;
-
force_grow:
-
x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
-
-
/* cache_grow can reenable interrupts, then ac could change. */
-
ac = cpu_cache_get(cachep);
-
node = numa_mem_id();
-
-
/* no objects in sight? abort */
-
if (!x && (ac->avail == 0 || force_refill))
-
return NULL;
-
-
if (!ac->avail) /* objects refilled by interrupt? */
-
goto retry;
-
}
-
ac->touched = 1;
-
-
return ac_get_obj(cachep, ac, flags, force_refill);
- }
点击(此处)折叠或打开
-
cpu_cache_get(cachep)->avail = 0;
-
cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
-
cpu_cache_get(cachep)->batchcount = 1;
-
cpu_cache_get(cachep)->touched = 0;
-
cachep->batchcount = 1;
-
cachep->limit = BOOT_CPUCACHE_ENTRIES;
- return 0;
点击(此处)折叠或打开
-
ac = cpu_cache_get(cachep);
-
if (likely(ac->avail)) {
-
ac->touched = 1;
-
objp = ac_get_obj(cachep, ac, flags, false);
-
-
/*
-
* Allow for the possibility all avail objects are not allowed
-
* by the current flags
-
*/
-
if (objp) {
-
STATS_INC_ALLOCHIT(cachep);
-
goto out;
-
}
-
force_refill = true;
- }
后来才发现是自己代码没看全- -, 我们看这样一段代码它在kmem_cache_init初始化后,调用的
点击(此处)折叠或打开
-
void __init kmem_cache_init_late(void)
-
{
-
struct kmem_cache *cachep;
-
-
slab_state = UP;
-
-
/* 6) resize the head arrays to their final sizes */
-
mutex_lock(&slab_mutex);
-
list_for_each_entry(cachep, &slab_caches, list)
-
if (enable_cpucache(cachep, GFP_NOWAIT))
-
BUG();
-
mutex_unlock(&slab_mutex);
-
-
/* Annotate slab for lockdep -- annotate the malloc caches */
-
init_lock_keys();
-
-
/* */
-
slab_state = FULL;
-
-
/*
-
* Register a cpu startup notifier callback that initializes
-
* cpu_cache_get for all new cpus
-
*/
-
register_cpu_notifier(&cpucache_notifier);
-
-
#ifdef CONFIG_NUMA
-
/*
-
* Register a memory hotplug callback that initializes and frees
-
* nodelists.
-
*/
-
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
-
#endif
-
-
/*
-
* The reap timers are started later, with a module init call: That part
-
* of the kernel is not yet operational.
-
*/
- }
点击(此处)折叠或打开
-
/* Called with slab_mutex held always */
-
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
-
{
-
int err;
-
int limit = 0;
-
int shared = 0;
-
int batchcount = 0;
-
-
if (!is_root_cache(cachep)) {
-
struct kmem_cache *root = memcg_root_cache(cachep);
-
limit = root->limit;
-
shared = root->shared;
-
batchcount = root->batchcount;
-
}
-
-
if (limit && shared && batchcount)
-
goto skip_setup;
-
/*
-
* The head array serves three purposes:
-
* - create a LIFO ordering, i.e. return objects that are cache-warm
-
* - reduce the number of spinlock operations.
-
* - reduce the number of linked list operations on the slab and
-
* bufctl chains: array operations are cheaper.
-
* The numbers are guessed, we should auto-tune as described by
-
* Bonwick.
-
*/
-
if (cachep->size > 131072) // size 大一128k 小于page_size 则limit为1
-
limit = 1;
-
else if (cachep->size > PAGE_SIZE)
-
limit = 8;
-
else if (cachep->size > 1024)
-
limit = 24;
-
else if (cachep->size > 256)
-
limit = 54;
-
else
-
limit = 120;
-
-
/*
-
* CPU bound tasks (e.g. network routing) can exhibit cpu bound
-
* allocation behaviour: Most allocs on one cpu, most free operations
-
* on another cpu. For these cases, an efficient object passing between
-
* cpus is necessary. This is provided by a shared array. The array
-
* replaces Bonwick's magazine layer.
-
* On uniprocessor, it's functionally equivalent (but less efficient)
-
* to a larger limit. Thus disabled by default.
-
*/
-
shared = 0;
-
if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1) // smp 下 shared为8 ,单核为0
-
shared = 8;
-
-
#if DEBUG
-
/*
-
* With debugging enabled, large batchcount lead to excessively long
-
* periods with disabled local interrupts. Limit the batchcount
-
*/
-
if (limit > 32)
-
limit = 32;
-
#endif
-
batchcount = (limit + 1) / 2;
-
skip_setup:
-
err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp); //设置 参数值到cache里
-
if (err)
-
printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
-
cachep->name, -err);
-
return err;
- }
点击(此处)折叠或打开
-
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
-
int batchcount, int shared, gfp_t gfp)
-
{
-
int ret;
-
struct kmem_cache *c = NULL;
-
int i = 0;
-
-
ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp); // 设置传递进来的cache的东西
-
-
if (slab_state < FULL)
-
return ret;
-
-
if ((ret < 0) || !is_root_cache(cachep))
-
return ret;
-
-
VM_BUG_ON(!mutex_is_locked(&slab_mutex));
-
for_each_memcg_cache_index(i) {
-
c = cache_from_memcg(cachep, i);
-
if (c)
-
/* return value determined by the parent cache only */
-
__do_tune_cpucache(c, limit, batchcount, shared, gfp);
-
}
-
-
return ret;
- }
点击(此处)折叠或打开
-
/* Always called with the slab_mutex held */
-
static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
-
int batchcount, int shared, gfp_t gfp)
-
{
-
struct ccupdate_struct *new;
- // 说明一下上面的结构体
-
点击(此处)折叠或打开
-
struct ccupdate_struct {
-
struct kmem_cache *cachep;
-
struct array_cache *new[0];
- };
-
struct ccupdate_struct {
-
int i;
-
-
new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *), // 这个函数用完new就释放了。说明它只是起到一个中转的作用.
-
gfp);
-
if (!new)
-
return -ENOMEM;
-
-
for_each_online_cpu(i) {
-
new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
-
batchcount, gfp);
-
if (!new->new[i]) {
-
for (i--; i >= 0; i--)
-
kfree(new->new[i]);
-
kfree(new);
-
return -ENOMEM;
-
}
-
}
-
new->cachep = cachep;
-
-
on_each_cpu(do_ccupdate_local, (void *)new, 1); // 关键点: 每个cpu上都调用do_ccupdate_local处理new。
-
-
check_irq_on();
-
cachep->batchcount = batchcount;
-
cachep->limit = limit;
-
cachep->shared = shared;
-
-
for_each_online_cpu(i) {
-
struct array_cache *ccold = new->new[i];
-
if (!ccold)
-
continue;
-
spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
-
free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); //
-
spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
-
kfree(ccold);
-
}
-
kfree(new);
-
return alloc_kmemlist(cachep, gfp);
- }
点击(此处)折叠或打开
-
static void do_ccupdate_local(void *info)
-
{
-
struct ccupdate_struct *new = info;
-
struct array_cache *old;
-
-
check_irq_off();
-
old = cpu_cache_get(new->cachep);
-
-
new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];// 由于之前 new->cache已经指向了我们的cache,所以这里操作的是我们cache的array指向新的地方.
- // 而new->new这个array的初始化是在申请它的时候 见上个函数里的alloc_arraycache:
-
点击(此处)折叠或打开
-
static struct array_cache *alloc_arraycache(int node, int entries,
-
int batchcount, gfp_t gfp)
-
{
-
int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
-
struct array_cache *nc = NULL;
-
-
nc = kmalloc_node(memsize, gfp, node);
-
/*
-
* The array_cache structures contain pointers to free object.
-
* However, when such objects are allocated or transferred to another
-
* cache the pointers are not cleared and they could be counted as
-
* valid references during a kmemleak scan. Therefore, kmemleak must
-
* not scan such objects.
-
*/
-
kmemleak_no_scan(nc);
-
if (nc) {
-
nc->avail = 0;
-
nc->limit = entries;
-
nc->batchcount = batchcount;
-
nc->touched = 0;
-
spin_lock_init(&nc->lock);
-
}
-
return nc;
- }
-
static struct array_cache *alloc_arraycache(int node, int entries,
-
new->new[smp_processor_id()] = old;
- }
我们可以看看实际的内核开启slab的信息:
点击(此处)折叠或打开
-
cat /proc/slabinfo
-
slabinfo - version: 2.1
-
# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> : tunables <limit> <batchcount> <sharedfactor> : slabdata <active_slabs> <num_slabs> <sharedavail>
-
nf_conntrack_expect 0 0 152 26 1 : tunables 120 60 8 : slabdata 0 0 0
-
nf_conntrack_8050c5f0 2 26 296 13 1 : tunables 54 27 8 : slabdata 2 2 0
bridge_fdb_cache 4 78 48 78 1 : tunables 120 60 8 : slabdata 1 1 0
fib6_nodes 12 113 32 113 1 : tunables 120 60 8 : slabdata 1 1 0
ip6_dst_cache 25 57 208 19 1 : tunables 120 60 8 : slabdata 3 3 0
ip6_mrt_cache 0 0 112 35 1 : tunables 120 60 8 : slabdata 0 0 0
RAWv6 8 15 720 5 1 : tunables 54 27 8 : slabdata 3 3 0
UDPLITEv6 0 0 688 11 2 : tunables 54 27 8 : slabdata 0 0 0
UDPv6 3 22 688 11 2 : tunables 54 27 8 : slabdata 2 2 0
tw_sock_TCPv6 0 0 144 27 1 : tunables 120 60 8 : slabdata 0 0 0
request_sock_TCPv6 0 0 112 35 1 : tunables 120 60 8 : slabdata 0 0 0
TCPv6 5 6 1328 3 1 : tunables 24 12 8 : slabdata 2 2 0
ubi_wl_entry_slab 463 580 24 145 1 : tunables 120 60 8 : slabdata 4 4 0
sd_ext_cdb 2 113 32 113 1 : tunables 120 60 8 : slabdata 1 1 0
fuse_request 0 0 384 10 1 : tunables 54 27 8 : slabdata 0 0 0
fuse_inode 0 0 416 9 1 : tunables 54 27 8 : slabdata 0 0 0
jffs2_inode_cache 15 145 24 145 1 : tunables 120 60 8 : slabdata 1 1 0
jffs2_node_frag 130 290 24 145 1 : tunables 120 60 8 : slabdata 2 2 0
uid_cache 0 0 48 78 1 : tunables 120 60 8 : slabdata 0 0 0
UNIX 24 32 480 8 1 : tunables 54 27 8 : slabdata 4 4 0
ip_mrt_cache 0 0 96 40 1 : tunables 120 60 8 : slabdata 0 0 0
UDP-Lite 0 0 560 7 1 : tunables 54 27 8 : slabdata 0 0 0
tcp_bind_bucket 6 113 32 113 1 : tunables 120 60 8 : slabdata 1 1 0
inet_peer_cache 8 24 160 24 1 : tunables 120 60 8 : slabdata 1 1 0
ip_fib_trie 7 113 32 113 1 : tunables 120 60 8 : slabdata 1 1 0
ip_fib_alias 8 145 24 145 1 : tunables 120 60 8 : slabdata 1 1 0
ip_dst_cache 6 27 144 27 1 : tunables 120 60 8 : slabdata 1 1 0
PING 0 0 528 7 1 : tunables 54 27 8 : slabdata 0 0 0
RAW 4 7 544 7 1 : tunables 54 27 8 : slabdata 1 1 0
UDP 13 14 560 7 1 : tunables 54 27 8 : slabdata 2 2 0
tw_sock_TCP 0 0 112 35 1 : tunables 120 60 8 : slabdata 0 0 0
request_sock_TCP 0 0 80 48 1 : tunables 120 60 8 : slabdata 0 0 0
TCP 1 6 1184 6 2 : tunables 24 12 8 : slabdata 1 1 0
- ......
-
size-2048(DMA) 0 0 2048 2 1 : tunables 24 12 8 : slabdata 0 0 0
size-2048 192 192 2048 2 1 : tunables 24 12 8 : slabdata 96 96 0
size-1024(DMA) 0 0 1024 4 1 : tunables 54 27 8 : slabdata 0 0 0
size-1024 215 216 1024 4 1 : tunables 54 27 8 : slabdata 54 54 0
size-512(DMA) 0 0 512 8 1 : tunables 54 27 8 : slabdata 0 0 0
size-512 601 624 512 8 1 : tunables 54 27 8 : slabdata 78 78 0
size-256(DMA) 0 0 256 15 1 : tunables 120 60 8 : slabdata 0 0 0
size-256 1234 1245 256 15 1 : tunables 120 60 8 : slabdata 83 83 0
size-192(DMA) 0 0 256 15 1 : tunables 120 60 8 : slabdata 0 0 0
size-192 287 300 256 15 1 : tunables 120 60 8 : slabdata 20 20 0
size-128(DMA) 0 0 128 30 1 : tunables 120 60 8 : slabdata 0 0 0
size-128 1890 1890 128 30 1 : tunables 120 60 8 : slabdata 63 63 0
size-96(DMA) 0 0 128 30 1 : tunables 120 60 8 : slabdata 0 0 0
size-96 930 930 128 30 1 : tunables 120 60 8 : slabdata 31 31 0
size-64(DMA) 0 0 128 30 1 : tunables 120 60 8 : slabdata 0 0 0
size-32(DMA) 0 0 128 30 1 : tunables 120 60 8 : slabdata 0 0 0
size-64 1577 1650 128 30 1 : tunables 120 60 8 : slabdata 55 55 0
size-32 6213 6300 128 30 1 : tunables 120 60 8 : slabdata 210 210 0
kmem_cache 150 160 96 40 1 : tunables 120 60 8 : slabdata 4 4 0
点击(此处)折叠或打开
-
void __init kmem_cache_init_late(void)
-
{
- }
slab是slub和slob的基础。
SLOB的目标是针对嵌入式系统的,主要是适用于那些内存非常有限的系统,比如32MB以下的内存,它不太注重large smp系统,虽然最近在这方面有一些小的改进
SLUB allocator,用于替代 slab 代码。通过取消了大量的队列和相关开销、简化 slab 的结构,SLUB 承诺提供更好的性能和更好的系统可伸缩性,并且可以同时保持现有的 slab 分配器接口
说了这么多,我们用个图来简单描述下slab机制:
