Linux內核中創建cache節點由函數kmem_cache_create()實現。
該函數的執行流程:
1,從全局cache_cache中獲得cache結構,因為全局cache_cache初始化對象的大小就是kmem_cache結構的大小,所以返回的指針正好可以轉換為cache結構;調用 kmem_cache_zalloc(&cache_cache, gfp);
2,獲得slab中碎片大小,由函數calculate_slab_order()實現;
3,計算並初始化cache的各種屬性,如果是外置式,需要用kmem_find_general_cachep(slab_size, 0u)指定cachep->slabp_cache,用於存放slab對象和kmem_bufctl_t[]數組;
4,設置每個CPU上得本地cache,setup_cpu_cache();
5,cache創建完畢,將其加入到全局slab cache鏈表中;
一、主實現
[cpp]
- /**
- * kmem_cache_create - Create a cache.
- * @name: A string which is used in /proc/slabinfo to identify this cache.
- * @size: The size of objects to be created in this cache.
- * @align: The required alignment for the objects.
- * @flags: SLAB flags
- * @ctor: A constructor for the objects.
- *
- * Returns a ptr to the cache on success, NULL on failure.
- * Cannot be called within a int, but can be interrupted.
- * The @ctor is run when new pages are allocated by the cache.
- *
- * @name must be valid until the cache is destroyed. This implies that
- * the module calling this has to destroy the cache before getting unloaded.
- * Note that kmem_cache_name() is not guaranteed to return the same pointer,
- * therefore applications must manage it themselves.
- *
- * The flags are
- *
- * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
- * to catch references to uninitialised memory.
- *
- * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
- * for buffer overruns.
- *
- * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
- * cacheline. This can be beneficial if you're counting cycles as closely
- * as davem.
- */
- /*創建slab系統頂層的cache節點。創建完成後,cache
- 裡並沒有任何slab以及對象,只有當分配對象
- ,並且cache中沒有空閒對象時,才會創建新的slab。*/
- struct kmem_cache *
- kmem_cache_create (const char *name, size_t size, size_t align,
- unsigned long flags, void (*ctor)(void *))
- {
- size_t left_over, slab_size, ralign;
- struct kmem_cache *cachep = NULL, *pc;
- gfp_t gfp;
-
- /*
- * Sanity checks... these are all serious usage bugs.
- *//* 安全性檢查 */
- if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
- size > KMALLOC_MAX_SIZE) {
- printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
- name);
- BUG();
- }
-
- /*
- * We use cache_chain_mutex to ensure a consistent view of
- * cpu_online_mask as well. Please see cpuup_callback
- */
- /* slab分配器是否已經初始化好,如果是內核啟動階段
- ,則只有一個cpu執行slab分配器的初始化動作,無需加鎖,否則需要加鎖 */
- if (slab_is_available()) {
- get_online_cpus();
- mutex_lock(&cache_chain_mutex);
- }
- /* 遍歷cache鏈,做些校驗工作 */
- list_for_each_entry(pc, &cache_chain, next) {
- char tmp;
- int res;
-
- /*
- * This happens when the module gets unloaded and doesn't
- * destroy its slab cache and no-one else reuses the vmalloc
- * area of the module. Print a warning.
- */
- /* 檢查cache鏈表中的cache是否都有名字 */
- res = probe_kernel_address(pc->name, tmp);
- if (res) {/*沒有名字,報錯*/
- printk(KERN_ERR
- "SLAB: cache with size %d has lost its name\n",
- pc->buffer_size);
- continue;
- }
- /* 檢查cache鏈表中是否已經存在相同名字的cache */
- if (!strcmp(pc->name, name)) {
- printk(KERN_ERR
- "kmem_cache_create: duplicate cache %s\n", name);
- dump_stack();
- goto oops;
- }
- }
-
- #if DEBUG
- WARN_ON(strchr(name, ' ')); /* It confuses parsers */
- #if FORCED_DEBUG
- /*
- * Enable redzoning and last user accounting, except for caches with
- * large objects, if the increased size would increase the object size
- * above the next power of two: caches with object sizes just above a
- * power of two have a significant amount of internal fragmentation.
- */
- if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
- 2 * sizeof(unsigned long long)))
- flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
- if (!(flags & SLAB_DESTROY_BY_RCU))
- flags |= SLAB_POISON;
- #endif
- if (flags & SLAB_DESTROY_BY_RCU)
- BUG_ON(flags & SLAB_POISON);
- #endif
- /*
- * Always checks flags, a caller might be expecting debug support which
- * isn't available.
- */
- BUG_ON(flags & ~CREATE_MASK);
-
- /*
- * Check that size is in terms of words. This is needed to avoid
- * unaligned accesses for some archs when redzoning is used, and makes
- * sure any on-slab bufctl's are also correctly aligned.
- */
- if (size & (BYTES_PER_WORD - 1)) {
- size += (BYTES_PER_WORD - 1);
- size &= ~(BYTES_PER_WORD - 1);
- }
-
- /* calculate the final buffer alignment: */
-
- /* 1) arch recommendation: can be overridden for debug */
- if (flags & SLAB_HWCACHE_ALIGN) {
- /*
- * Default alignment: as specified by the arch code. Except if
- * an object is really small, then squeeze multiple objects into
- * one cacheline.
- */
- ralign = cache_line_size();
- while (size <= ralign / 2)
- ralign /= 2;
- } else {
- ralign = BYTES_PER_WORD;
- }
-
- /*
- * Redzoning and user store require word alignment or possibly larger.
- * Note this will be overridden by architecture or caller mandated
- * alignment if either is greater than BYTES_PER_WORD.
- */
- if (flags & SLAB_STORE_USER)
- ralign = BYTES_PER_WORD;
-
- if (flags & SLAB_RED_ZONE) {
- ralign = REDZONE_ALIGN;
- /* If redzoning, ensure that the second redzone is suitably
- * aligned, by adjusting the object size accordingly. */
- size += REDZONE_ALIGN - 1;
- size &= ~(REDZONE_ALIGN - 1);
- }
-
- /* 2) arch mandated alignment */
- if (ralign < ARCH_SLAB_MINALIGN) {
- ralign = ARCH_SLAB_MINALIGN;
- }
- /* 3) caller mandated alignment */
- if (ralign < align) {
- ralign = align;
- }
- /* disable debug if necessary */
- if (ralign > __alignof__(unsigned long long))
- flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
- /*
- * 4) Store it.
- */
- align = ralign;
- /* slab分配器是否已經可用 */
- if (slab_is_available())
- gfp = GFP_KERNEL;
- else
- /* slab初始化好之前,不允許阻塞,且只能在低端內存區分配 */
- gfp = GFP_NOWAIT;
-
- /* Get cache's description obj. */
- /* 獲得struct kmem_cache對象 ,為什麼能從cache中獲得的對象是
- kmem_cache結構呢,因為這裡的全局變量cache_cache的對象大小
- 就是kmem_cache結構大小*/
- cachep = kmem_cache_zalloc(&cache_cache, gfp);
- if (!cachep)
- goto oops;
-
- #if DEBUG
- cachep->obj_size = size;
-
- /*
- * Both debugging options require word-alignment which is calculated
- * into align above.
- */
- if (flags & SLAB_RED_ZONE) {
- /* add space for red zone words */
- cachep->obj_offset += sizeof(unsigned long long);
- size += 2 * sizeof(unsigned long long);
- }
- if (flags & SLAB_STORE_USER) {
- /* user store requires one word storage behind the end of
- * the real object. But if the second red zone needs to be
- * aligned to 64 bits, we must allow that much space.
- */
- if (flags & SLAB_RED_ZONE)
- size += REDZONE_ALIGN;
- else
- size += BYTES_PER_WORD;
- }
- #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
- if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
- && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
- cachep->obj_offset += PAGE_SIZE - size;
- size = PAGE_SIZE;
- }
- #endif
- #endif
-
- /*
- * Determine if the slab management is 'on' or 'off' slab.
- * (bootstrapping cannot cope with offslab caches so don't do
- * it too early on.)
- */
- /* 確定slab管理對象的存儲方式:內置還是外置
- 。通常,當對象大於等於512時,使用外置方式
- 。初始化階段采用內置式。
- slab_early_init 參見kmem_cache_init函數 */
- if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init)
- /*
- * Size is large, assume best to place the slab management obj
- * off-slab (should allow better packing of objs).
- */
- flags |= CFLGS_OFF_SLAB;
-
- size = ALIGN(size, align);
- /* 獲得slab中碎片的大小 */
- left_over = calculate_slab_order(cachep, size, align, flags);
- /* cachep->num為該cache中每個slab的對象數,為0,表示為該對象創建cache失敗 */
- if (!cachep->num) {
- printk(KERN_ERR
- "kmem_cache_create: couldn't create cache %s.\n", name);
- kmem_cache_free(&cache_cache, cachep);
- cachep = NULL;
- goto oops;
- }
- /* 計算slab管理對象的大小,包括struct slab對象和kmem_bufctl_t數組 */
- slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
- + sizeof(struct slab), align);
-
- /*
- * If the slab has been placed off-slab, and we have enough space then
- * move it on-slab. This is at the expense of any extra colouring.
- */
-
- /* 如果這是一個外置式slab,並且碎片大小大於slab管理對象的大小
- ,則可將slab管理對象移到slab中,改造成一個內置式slab */
- if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
- /* 除去off-slab標志位 */
- flags &= ~CFLGS_OFF_SLAB;
- /* 更新碎片大小 */
- left_over -= slab_size;
- }
-
- if (flags & CFLGS_OFF_SLAB) {
- /* really off slab. No need for manual alignment */
- /* align是針對slab對象的,如果slab管理對象是外置存儲
- ,自然不會像內置那樣影響到後面slab對象的存儲位置
- ,也就不需要對齊了 */
- slab_size =
- cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
-
- #ifdef CONFIG_PAGE_POISONING
- /* If we're going to use the generic kernel_map_pages()
- * poisoning, then it's going to smash the contents of
- * the redzone and userword anyhow, so switch them off.
- */
- if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
- flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
- #endif
- }
- /* cache的著色塊的單位大小 */
- cachep->colour_off = cache_line_size();
- /* Offset must be a multiple of the alignment. */
- /* 著色塊大小必須是對象要求對齊方式的倍數 */
- if (cachep->colour_off < align)
- cachep->colour_off = align;
- /* 計算碎片區需要多少個著色快 */
- cachep->colour = left_over / cachep->colour_off;
- /* slab管理對象的大小 */
- cachep->slab_size = slab_size;
- cachep->flags = flags;
- cachep->gfpflags = 0;
- if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
- cachep->gfpflags |= GFP_DMA;
- /* slab對象的大小 */
- cachep->buffer_size = size;
- /* 計算對象在slab中索引時用,參見obj_to_index函數 */
- cachep->reciprocal_buffer_size = reciprocal_value(size);
-
- if (flags & CFLGS_OFF_SLAB) {
- /* 分配一個slab管理區域對象,保存在slabp_cache中,
- 這個函數傳入的大小為slab_size,也就是分配slab_size大小的cache
- ,在slab創建的時候如果是外置式,那麼需要從分配的這裡面
- 分配出slab對象,剩下的空間放kmem_bufctl_t[]數組,
- 如果是內置式的slab,此指針為空 */
- cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
- /*
- * This is a possibility for one of the malloc_sizes caches.
- * But since we go off slab only for object size greater than
- * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
- * this should not happen at all.
- * But leave a BUG_ON for some lucky dude.
- */
- BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
- }
- cachep->ctor = ctor;
- cachep->name = name;
- /* 設置每個cpu上的local cache */
- if (setup_cpu_cache(cachep, gfp)) {
- __kmem_cache_destroy(cachep);
- cachep = NULL;
- goto oops;
- }
-
- /* cache setup completed, link it into the list */
- /* cache創建完畢,將其加入到全局slab cache鏈表中 */
- list_add(&cachep->next, &cache_chain);
- oops:
- if (!cachep && (flags & SLAB_PANIC))
- panic("kmem_cache_create(): failed to create slab `%s'\n",
- name);
- if (slab_is_available()) {
- mutex_unlock(&cache_chain_mutex);
- put_online_cpus();
- }
- return cachep;
- }
其中,cache_cache
[cpp]
- /* internal cache of cache description objs */
- static struct kmem_cache cache_cache = {
- .batchcount = 1,
- .limit = BOOT_CPUCACHE_ENTRIES,
- .shared = 1,
- .buffer_size = sizeof(struct kmem_cache),/*大小為cache結構,難怪名稱為cache_cache*/
- .name = "kmem_cache",
- };