diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 49 |
1 files changed, 28 insertions, 21 deletions
@@ -291,6 +291,7 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) | |||
291 | #endif | 291 | #endif |
292 | } | 292 | } |
293 | 293 | ||
294 | /* Verify that a pointer has an address that is valid within a slab page */ | ||
294 | static inline int check_valid_pointer(struct kmem_cache *s, | 295 | static inline int check_valid_pointer(struct kmem_cache *s, |
295 | struct page *page, const void *object) | 296 | struct page *page, const void *object) |
296 | { | 297 | { |
@@ -619,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, | |||
619 | * A. Free pointer (if we cannot overwrite object on free) | 620 | * A. Free pointer (if we cannot overwrite object on free) |
620 | * B. Tracking data for SLAB_STORE_USER | 621 | * B. Tracking data for SLAB_STORE_USER |
621 | * C. Padding to reach required alignment boundary or at mininum | 622 | * C. Padding to reach required alignment boundary or at mininum |
622 | * one word if debuggin is on to be able to detect writes | 623 | * one word if debugging is on to be able to detect writes |
623 | * before the word boundary. | 624 | * before the word boundary. |
624 | * | 625 | * |
625 | * Padding is done using 0x5a (POISON_INUSE) | 626 | * Padding is done using 0x5a (POISON_INUSE) |
@@ -1268,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1268 | * may return off node objects because partial slabs are obtained | 1269 | * may return off node objects because partial slabs are obtained |
1269 | * from other nodes and filled up. | 1270 | * from other nodes and filled up. |
1270 | * | 1271 | * |
1271 | * If /sys/slab/xx/defrag_ratio is set to 100 (which makes | 1272 | * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes |
1272 | * defrag_ratio = 1000) then every (well almost) allocation will | 1273 | * defrag_ratio = 1000) then every (well almost) allocation will |
1273 | * first attempt to defrag slab caches on other nodes. This means | 1274 | * first attempt to defrag slab caches on other nodes. This means |
1274 | * scanning over all nodes to look for partial slabs which may be | 1275 | * scanning over all nodes to look for partial slabs which may be |
@@ -1343,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1343 | * Adding an empty slab to the partial slabs in order | 1344 | * Adding an empty slab to the partial slabs in order |
1344 | * to avoid page allocator overhead. This slab needs | 1345 | * to avoid page allocator overhead. This slab needs |
1345 | * to come after the other slabs with objects in | 1346 | * to come after the other slabs with objects in |
1346 | * order to fill them up. That way the size of the | 1347 | * so that the others get filled first. That way the |
1347 | * partial list stays small. kmem_cache_shrink can | 1348 | * size of the partial list stays small. |
1348 | * reclaim empty slabs from the partial list. | 1349 | * |
1350 | * kmem_cache_shrink can reclaim any empty slabs from the | ||
1351 | * partial list. | ||
1349 | */ | 1352 | */ |
1350 | add_partial(n, page, 1); | 1353 | add_partial(n, page, 1); |
1351 | slab_unlock(page); | 1354 | slab_unlock(page); |
@@ -1368,7 +1371,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1368 | if (c->freelist) | 1371 | if (c->freelist) |
1369 | stat(c, DEACTIVATE_REMOTE_FREES); | 1372 | stat(c, DEACTIVATE_REMOTE_FREES); |
1370 | /* | 1373 | /* |
1371 | * Merge cpu freelist into freelist. Typically we get here | 1374 | * Merge cpu freelist into slab freelist. Typically we get here |
1372 | * because both freelists are empty. So this is unlikely | 1375 | * because both freelists are empty. So this is unlikely |
1373 | * to occur. | 1376 | * to occur. |
1374 | */ | 1377 | */ |
@@ -1399,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1399 | 1402 | ||
1400 | /* | 1403 | /* |
1401 | * Flush cpu slab. | 1404 | * Flush cpu slab. |
1405 | * | ||
1402 | * Called from IPI handler with interrupts disabled. | 1406 | * Called from IPI handler with interrupts disabled. |
1403 | */ | 1407 | */ |
1404 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) | 1408 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) |
@@ -1457,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) | |||
1457 | * rest of the freelist to the lockless freelist. | 1461 | * rest of the freelist to the lockless freelist. |
1458 | * | 1462 | * |
1459 | * And if we were unable to get a new slab from the partial slab lists then | 1463 | * And if we were unable to get a new slab from the partial slab lists then |
1460 | * we need to allocate a new slab. This is slowest path since we may sleep. | 1464 | * we need to allocate a new slab. This is the slowest path since it involves |
1465 | * a call to the page allocator and the setup of a new slab. | ||
1461 | */ | 1466 | */ |
1462 | static void *__slab_alloc(struct kmem_cache *s, | 1467 | static void *__slab_alloc(struct kmem_cache *s, |
1463 | gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) | 1468 | gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) |
@@ -1471,7 +1476,9 @@ static void *__slab_alloc(struct kmem_cache *s, | |||
1471 | slab_lock(c->page); | 1476 | slab_lock(c->page); |
1472 | if (unlikely(!node_match(c, node))) | 1477 | if (unlikely(!node_match(c, node))) |
1473 | goto another_slab; | 1478 | goto another_slab; |
1479 | |||
1474 | stat(c, ALLOC_REFILL); | 1480 | stat(c, ALLOC_REFILL); |
1481 | |||
1475 | load_freelist: | 1482 | load_freelist: |
1476 | object = c->page->freelist; | 1483 | object = c->page->freelist; |
1477 | if (unlikely(!object)) | 1484 | if (unlikely(!object)) |
@@ -1616,6 +1623,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
1616 | 1623 | ||
1617 | if (unlikely(SlabDebug(page))) | 1624 | if (unlikely(SlabDebug(page))) |
1618 | goto debug; | 1625 | goto debug; |
1626 | |||
1619 | checks_ok: | 1627 | checks_ok: |
1620 | prior = object[offset] = page->freelist; | 1628 | prior = object[offset] = page->freelist; |
1621 | page->freelist = object; | 1629 | page->freelist = object; |
@@ -1630,8 +1638,7 @@ checks_ok: | |||
1630 | goto slab_empty; | 1638 | goto slab_empty; |
1631 | 1639 | ||
1632 | /* | 1640 | /* |
1633 | * Objects left in the slab. If it | 1641 | * Objects left in the slab. If it was not on the partial list before |
1634 | * was not on the partial list before | ||
1635 | * then add it. | 1642 | * then add it. |
1636 | */ | 1643 | */ |
1637 | if (unlikely(!prior)) { | 1644 | if (unlikely(!prior)) { |
@@ -1845,13 +1852,11 @@ static unsigned long calculate_alignment(unsigned long flags, | |||
1845 | unsigned long align, unsigned long size) | 1852 | unsigned long align, unsigned long size) |
1846 | { | 1853 | { |
1847 | /* | 1854 | /* |
1848 | * If the user wants hardware cache aligned objects then | 1855 | * If the user wants hardware cache aligned objects then follow that |
1849 | * follow that suggestion if the object is sufficiently | 1856 | * suggestion if the object is sufficiently large. |
1850 | * large. | ||
1851 | * | 1857 | * |
1852 | * The hardware cache alignment cannot override the | 1858 | * The hardware cache alignment cannot override the specified |
1853 | * specified alignment though. If that is greater | 1859 | * alignment though. If that is greater then use it. |
1854 | * then use it. | ||
1855 | */ | 1860 | */ |
1856 | if ((flags & SLAB_HWCACHE_ALIGN) && | 1861 | if ((flags & SLAB_HWCACHE_ALIGN) && |
1857 | size > cache_line_size() / 2) | 1862 | size > cache_line_size() / 2) |
@@ -2049,6 +2054,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, | |||
2049 | #endif | 2054 | #endif |
2050 | init_kmem_cache_node(n); | 2055 | init_kmem_cache_node(n); |
2051 | atomic_long_inc(&n->nr_slabs); | 2056 | atomic_long_inc(&n->nr_slabs); |
2057 | |||
2052 | /* | 2058 | /* |
2053 | * lockdep requires consistent irq usage for each lock | 2059 | * lockdep requires consistent irq usage for each lock |
2054 | * so even though there cannot be a race this early in | 2060 | * so even though there cannot be a race this early in |
@@ -2301,7 +2307,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) | |||
2301 | /* | 2307 | /* |
2302 | * We could also check if the object is on the slabs freelist. | 2308 | * We could also check if the object is on the slabs freelist. |
2303 | * But this would be too expensive and it seems that the main | 2309 | * But this would be too expensive and it seems that the main |
2304 | * purpose of kmem_ptr_valid is to check if the object belongs | 2310 | * purpose of kmem_ptr_valid() is to check if the object belongs |
2305 | * to a certain slab. | 2311 | * to a certain slab. |
2306 | */ | 2312 | */ |
2307 | return 1; | 2313 | return 1; |
@@ -2913,7 +2919,7 @@ void __init kmem_cache_init(void) | |||
2913 | /* | 2919 | /* |
2914 | * Patch up the size_index table if we have strange large alignment | 2920 | * Patch up the size_index table if we have strange large alignment |
2915 | * requirements for the kmalloc array. This is only the case for | 2921 | * requirements for the kmalloc array. This is only the case for |
2916 | * mips it seems. The standard arches will not generate any code here. | 2922 | * MIPS it seems. The standard arches will not generate any code here. |
2917 | * | 2923 | * |
2918 | * Largest permitted alignment is 256 bytes due to the way we | 2924 | * Largest permitted alignment is 256 bytes due to the way we |
2919 | * handle the index determination for the smaller caches. | 2925 | * handle the index determination for the smaller caches. |
@@ -2942,7 +2948,6 @@ void __init kmem_cache_init(void) | |||
2942 | kmem_size = sizeof(struct kmem_cache); | 2948 | kmem_size = sizeof(struct kmem_cache); |
2943 | #endif | 2949 | #endif |
2944 | 2950 | ||
2945 | |||
2946 | printk(KERN_INFO | 2951 | printk(KERN_INFO |
2947 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | 2952 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," |
2948 | " CPUs=%d, Nodes=%d\n", | 2953 | " CPUs=%d, Nodes=%d\n", |
@@ -3039,12 +3044,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
3039 | */ | 3044 | */ |
3040 | for_each_online_cpu(cpu) | 3045 | for_each_online_cpu(cpu) |
3041 | get_cpu_slab(s, cpu)->objsize = s->objsize; | 3046 | get_cpu_slab(s, cpu)->objsize = s->objsize; |
3047 | |||
3042 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 3048 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); |
3043 | up_write(&slub_lock); | 3049 | up_write(&slub_lock); |
3050 | |||
3044 | if (sysfs_slab_alias(s, name)) | 3051 | if (sysfs_slab_alias(s, name)) |
3045 | goto err; | 3052 | goto err; |
3046 | return s; | 3053 | return s; |
3047 | } | 3054 | } |
3055 | |||
3048 | s = kmalloc(kmem_size, GFP_KERNEL); | 3056 | s = kmalloc(kmem_size, GFP_KERNEL); |
3049 | if (s) { | 3057 | if (s) { |
3050 | if (kmem_cache_open(s, GFP_KERNEL, name, | 3058 | if (kmem_cache_open(s, GFP_KERNEL, name, |
@@ -3927,7 +3935,6 @@ SLAB_ATTR(remote_node_defrag_ratio); | |||
3927 | #endif | 3935 | #endif |
3928 | 3936 | ||
3929 | #ifdef CONFIG_SLUB_STATS | 3937 | #ifdef CONFIG_SLUB_STATS |
3930 | |||
3931 | static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) | 3938 | static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) |
3932 | { | 3939 | { |
3933 | unsigned long sum = 0; | 3940 | unsigned long sum = 0; |
@@ -4111,8 +4118,8 @@ static struct kset *slab_kset; | |||
4111 | #define ID_STR_LENGTH 64 | 4118 | #define ID_STR_LENGTH 64 |
4112 | 4119 | ||
4113 | /* Create a unique string id for a slab cache: | 4120 | /* Create a unique string id for a slab cache: |
4114 | * format | 4121 | * |
4115 | * :[flags-]size:[memory address of kmemcache] | 4122 | * Format :[flags-]size |
4116 | */ | 4123 | */ |
4117 | static char *create_unique_id(struct kmem_cache *s) | 4124 | static char *create_unique_id(struct kmem_cache *s) |
4118 | { | 4125 | { |