diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/slub.c | 326 |
1 files changed, 275 insertions, 51 deletions
@@ -149,6 +149,13 @@ static inline void ClearSlabDebug(struct page *page) | |||
149 | /* Enable to test recovery from slab corruption on boot */ | 149 | /* Enable to test recovery from slab corruption on boot */ |
150 | #undef SLUB_RESILIENCY_TEST | 150 | #undef SLUB_RESILIENCY_TEST |
151 | 151 | ||
152 | /* | ||
153 | * Currently fastpath is not supported if preemption is enabled. | ||
154 | */ | ||
155 | #if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT) | ||
156 | #define SLUB_FASTPATH | ||
157 | #endif | ||
158 | |||
152 | #if PAGE_SHIFT <= 12 | 159 | #if PAGE_SHIFT <= 12 |
153 | 160 | ||
154 | /* | 161 | /* |
@@ -243,6 +250,7 @@ enum track_item { TRACK_ALLOC, TRACK_FREE }; | |||
243 | static int sysfs_slab_add(struct kmem_cache *); | 250 | static int sysfs_slab_add(struct kmem_cache *); |
244 | static int sysfs_slab_alias(struct kmem_cache *, const char *); | 251 | static int sysfs_slab_alias(struct kmem_cache *, const char *); |
245 | static void sysfs_slab_remove(struct kmem_cache *); | 252 | static void sysfs_slab_remove(struct kmem_cache *); |
253 | |||
246 | #else | 254 | #else |
247 | static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } | 255 | static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } |
248 | static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) | 256 | static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) |
@@ -251,8 +259,16 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) | |||
251 | { | 259 | { |
252 | kfree(s); | 260 | kfree(s); |
253 | } | 261 | } |
262 | |||
254 | #endif | 263 | #endif |
255 | 264 | ||
265 | static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) | ||
266 | { | ||
267 | #ifdef CONFIG_SLUB_STATS | ||
268 | c->stat[si]++; | ||
269 | #endif | ||
270 | } | ||
271 | |||
256 | /******************************************************************** | 272 | /******************************************************************** |
257 | * Core slab cache functions | 273 | * Core slab cache functions |
258 | *******************************************************************/ | 274 | *******************************************************************/ |
@@ -280,15 +296,32 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) | |||
280 | #endif | 296 | #endif |
281 | } | 297 | } |
282 | 298 | ||
299 | /* | ||
300 | * The end pointer in a slab is special. It points to the first object in the | ||
301 | * slab but has bit 0 set to mark it. | ||
302 | * | ||
303 | * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 | ||
304 | * in the mapping set. | ||
305 | */ | ||
306 | static inline int is_end(void *addr) | ||
307 | { | ||
308 | return (unsigned long)addr & PAGE_MAPPING_ANON; | ||
309 | } | ||
310 | |||
311 | void *slab_address(struct page *page) | ||
312 | { | ||
313 | return page->end - PAGE_MAPPING_ANON; | ||
314 | } | ||
315 | |||
283 | static inline int check_valid_pointer(struct kmem_cache *s, | 316 | static inline int check_valid_pointer(struct kmem_cache *s, |
284 | struct page *page, const void *object) | 317 | struct page *page, const void *object) |
285 | { | 318 | { |
286 | void *base; | 319 | void *base; |
287 | 320 | ||
288 | if (!object) | 321 | if (object == page->end) |
289 | return 1; | 322 | return 1; |
290 | 323 | ||
291 | base = page_address(page); | 324 | base = slab_address(page); |
292 | if (object < base || object >= base + s->objects * s->size || | 325 | if (object < base || object >= base + s->objects * s->size || |
293 | (object - base) % s->size) { | 326 | (object - base) % s->size) { |
294 | return 0; | 327 | return 0; |
@@ -321,7 +354,8 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | |||
321 | 354 | ||
322 | /* Scan freelist */ | 355 | /* Scan freelist */ |
323 | #define for_each_free_object(__p, __s, __free) \ | 356 | #define for_each_free_object(__p, __s, __free) \ |
324 | for (__p = (__free); __p; __p = get_freepointer((__s), __p)) | 357 | for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ |
358 | __p)) | ||
325 | 359 | ||
326 | /* Determine object index from a given position */ | 360 | /* Determine object index from a given position */ |
327 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | 361 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) |
@@ -473,7 +507,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) | |||
473 | static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | 507 | static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) |
474 | { | 508 | { |
475 | unsigned int off; /* Offset of last byte */ | 509 | unsigned int off; /* Offset of last byte */ |
476 | u8 *addr = page_address(page); | 510 | u8 *addr = slab_address(page); |
477 | 511 | ||
478 | print_tracking(s, p); | 512 | print_tracking(s, p); |
479 | 513 | ||
@@ -651,7 +685,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
651 | if (!(s->flags & SLAB_POISON)) | 685 | if (!(s->flags & SLAB_POISON)) |
652 | return 1; | 686 | return 1; |
653 | 687 | ||
654 | start = page_address(page); | 688 | start = slab_address(page); |
655 | end = start + (PAGE_SIZE << s->order); | 689 | end = start + (PAGE_SIZE << s->order); |
656 | length = s->objects * s->size; | 690 | length = s->objects * s->size; |
657 | remainder = end - (start + length); | 691 | remainder = end - (start + length); |
@@ -685,9 +719,10 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
685 | endobject, red, s->inuse - s->objsize)) | 719 | endobject, red, s->inuse - s->objsize)) |
686 | return 0; | 720 | return 0; |
687 | } else { | 721 | } else { |
688 | if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) | 722 | if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { |
689 | check_bytes_and_report(s, page, p, "Alignment padding", endobject, | 723 | check_bytes_and_report(s, page, p, "Alignment padding", |
690 | POISON_INUSE, s->inuse - s->objsize); | 724 | endobject, POISON_INUSE, s->inuse - s->objsize); |
725 | } | ||
691 | } | 726 | } |
692 | 727 | ||
693 | if (s->flags & SLAB_POISON) { | 728 | if (s->flags & SLAB_POISON) { |
@@ -718,7 +753,7 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
718 | * of the free objects in this slab. May cause | 753 | * of the free objects in this slab. May cause |
719 | * another error because the object count is now wrong. | 754 | * another error because the object count is now wrong. |
720 | */ | 755 | */ |
721 | set_freepointer(s, p, NULL); | 756 | set_freepointer(s, p, page->end); |
722 | return 0; | 757 | return 0; |
723 | } | 758 | } |
724 | return 1; | 759 | return 1; |
@@ -752,18 +787,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | |||
752 | void *fp = page->freelist; | 787 | void *fp = page->freelist; |
753 | void *object = NULL; | 788 | void *object = NULL; |
754 | 789 | ||
755 | while (fp && nr <= s->objects) { | 790 | while (fp != page->end && nr <= s->objects) { |
756 | if (fp == search) | 791 | if (fp == search) |
757 | return 1; | 792 | return 1; |
758 | if (!check_valid_pointer(s, page, fp)) { | 793 | if (!check_valid_pointer(s, page, fp)) { |
759 | if (object) { | 794 | if (object) { |
760 | object_err(s, page, object, | 795 | object_err(s, page, object, |
761 | "Freechain corrupt"); | 796 | "Freechain corrupt"); |
762 | set_freepointer(s, object, NULL); | 797 | set_freepointer(s, object, page->end); |
763 | break; | 798 | break; |
764 | } else { | 799 | } else { |
765 | slab_err(s, page, "Freepointer corrupt"); | 800 | slab_err(s, page, "Freepointer corrupt"); |
766 | page->freelist = NULL; | 801 | page->freelist = page->end; |
767 | page->inuse = s->objects; | 802 | page->inuse = s->objects; |
768 | slab_fix(s, "Freelist cleared"); | 803 | slab_fix(s, "Freelist cleared"); |
769 | return 0; | 804 | return 0; |
@@ -869,7 +904,7 @@ bad: | |||
869 | */ | 904 | */ |
870 | slab_fix(s, "Marking all objects used"); | 905 | slab_fix(s, "Marking all objects used"); |
871 | page->inuse = s->objects; | 906 | page->inuse = s->objects; |
872 | page->freelist = NULL; | 907 | page->freelist = page->end; |
873 | } | 908 | } |
874 | return 0; | 909 | return 0; |
875 | } | 910 | } |
@@ -894,11 +929,10 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, | |||
894 | return 0; | 929 | return 0; |
895 | 930 | ||
896 | if (unlikely(s != page->slab)) { | 931 | if (unlikely(s != page->slab)) { |
897 | if (!PageSlab(page)) | 932 | if (!PageSlab(page)) { |
898 | slab_err(s, page, "Attempt to free object(0x%p) " | 933 | slab_err(s, page, "Attempt to free object(0x%p) " |
899 | "outside of slab", object); | 934 | "outside of slab", object); |
900 | else | 935 | } else if (!page->slab) { |
901 | if (!page->slab) { | ||
902 | printk(KERN_ERR | 936 | printk(KERN_ERR |
903 | "SLUB <none>: no slab for object 0x%p.\n", | 937 | "SLUB <none>: no slab for object 0x%p.\n", |
904 | object); | 938 | object); |
@@ -910,7 +944,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, | |||
910 | } | 944 | } |
911 | 945 | ||
912 | /* Special debug activities for freeing objects */ | 946 | /* Special debug activities for freeing objects */ |
913 | if (!SlabFrozen(page) && !page->freelist) | 947 | if (!SlabFrozen(page) && page->freelist == page->end) |
914 | remove_full(s, page); | 948 | remove_full(s, page); |
915 | if (s->flags & SLAB_STORE_USER) | 949 | if (s->flags & SLAB_STORE_USER) |
916 | set_track(s, object, TRACK_FREE, addr); | 950 | set_track(s, object, TRACK_FREE, addr); |
@@ -1007,7 +1041,7 @@ static unsigned long kmem_cache_flags(unsigned long objsize, | |||
1007 | */ | 1041 | */ |
1008 | if (slub_debug && (!slub_debug_slabs || | 1042 | if (slub_debug && (!slub_debug_slabs || |
1009 | strncmp(slub_debug_slabs, name, | 1043 | strncmp(slub_debug_slabs, name, |
1010 | strlen(slub_debug_slabs)) == 0)) | 1044 | strlen(slub_debug_slabs)) == 0)) |
1011 | flags |= slub_debug; | 1045 | flags |= slub_debug; |
1012 | } | 1046 | } |
1013 | 1047 | ||
@@ -1102,6 +1136,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1102 | SetSlabDebug(page); | 1136 | SetSlabDebug(page); |
1103 | 1137 | ||
1104 | start = page_address(page); | 1138 | start = page_address(page); |
1139 | page->end = start + 1; | ||
1105 | 1140 | ||
1106 | if (unlikely(s->flags & SLAB_POISON)) | 1141 | if (unlikely(s->flags & SLAB_POISON)) |
1107 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); | 1142 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); |
@@ -1113,7 +1148,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1113 | last = p; | 1148 | last = p; |
1114 | } | 1149 | } |
1115 | setup_object(s, page, last); | 1150 | setup_object(s, page, last); |
1116 | set_freepointer(s, last, NULL); | 1151 | set_freepointer(s, last, page->end); |
1117 | 1152 | ||
1118 | page->freelist = start; | 1153 | page->freelist = start; |
1119 | page->inuse = 0; | 1154 | page->inuse = 0; |
@@ -1129,7 +1164,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1129 | void *p; | 1164 | void *p; |
1130 | 1165 | ||
1131 | slab_pad_check(s, page); | 1166 | slab_pad_check(s, page); |
1132 | for_each_object(p, s, page_address(page)) | 1167 | for_each_object(p, s, slab_address(page)) |
1133 | check_object(s, page, p, 0); | 1168 | check_object(s, page, p, 0); |
1134 | ClearSlabDebug(page); | 1169 | ClearSlabDebug(page); |
1135 | } | 1170 | } |
@@ -1139,6 +1174,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1139 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1174 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
1140 | -pages); | 1175 | -pages); |
1141 | 1176 | ||
1177 | page->mapping = NULL; | ||
1142 | __free_pages(page, s->order); | 1178 | __free_pages(page, s->order); |
1143 | } | 1179 | } |
1144 | 1180 | ||
@@ -1183,7 +1219,7 @@ static __always_inline void slab_lock(struct page *page) | |||
1183 | 1219 | ||
1184 | static __always_inline void slab_unlock(struct page *page) | 1220 | static __always_inline void slab_unlock(struct page *page) |
1185 | { | 1221 | { |
1186 | bit_spin_unlock(PG_locked, &page->flags); | 1222 | __bit_spin_unlock(PG_locked, &page->flags); |
1187 | } | 1223 | } |
1188 | 1224 | ||
1189 | static __always_inline int slab_trylock(struct page *page) | 1225 | static __always_inline int slab_trylock(struct page *page) |
@@ -1294,8 +1330,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1294 | get_cycles() % 1024 > s->remote_node_defrag_ratio) | 1330 | get_cycles() % 1024 > s->remote_node_defrag_ratio) |
1295 | return NULL; | 1331 | return NULL; |
1296 | 1332 | ||
1297 | zonelist = &NODE_DATA(slab_node(current->mempolicy)) | 1333 | zonelist = &NODE_DATA( |
1298 | ->node_zonelists[gfp_zone(flags)]; | 1334 | slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)]; |
1299 | for (z = zonelist->zones; *z; z++) { | 1335 | for (z = zonelist->zones; *z; z++) { |
1300 | struct kmem_cache_node *n; | 1336 | struct kmem_cache_node *n; |
1301 | 1337 | ||
@@ -1337,17 +1373,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) | |||
1337 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | 1373 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) |
1338 | { | 1374 | { |
1339 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | 1375 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
1376 | struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); | ||
1340 | 1377 | ||
1341 | ClearSlabFrozen(page); | 1378 | ClearSlabFrozen(page); |
1342 | if (page->inuse) { | 1379 | if (page->inuse) { |
1343 | 1380 | ||
1344 | if (page->freelist) | 1381 | if (page->freelist != page->end) { |
1345 | add_partial(n, page, tail); | 1382 | add_partial(n, page, tail); |
1346 | else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) | 1383 | stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); |
1347 | add_full(n, page); | 1384 | } else { |
1385 | stat(c, DEACTIVATE_FULL); | ||
1386 | if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) | ||
1387 | add_full(n, page); | ||
1388 | } | ||
1348 | slab_unlock(page); | 1389 | slab_unlock(page); |
1349 | |||
1350 | } else { | 1390 | } else { |
1391 | stat(c, DEACTIVATE_EMPTY); | ||
1351 | if (n->nr_partial < MIN_PARTIAL) { | 1392 | if (n->nr_partial < MIN_PARTIAL) { |
1352 | /* | 1393 | /* |
1353 | * Adding an empty slab to the partial slabs in order | 1394 | * Adding an empty slab to the partial slabs in order |
@@ -1361,6 +1402,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1361 | slab_unlock(page); | 1402 | slab_unlock(page); |
1362 | } else { | 1403 | } else { |
1363 | slab_unlock(page); | 1404 | slab_unlock(page); |
1405 | stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); | ||
1364 | discard_slab(s, page); | 1406 | discard_slab(s, page); |
1365 | } | 1407 | } |
1366 | } | 1408 | } |
@@ -1373,12 +1415,19 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1373 | { | 1415 | { |
1374 | struct page *page = c->page; | 1416 | struct page *page = c->page; |
1375 | int tail = 1; | 1417 | int tail = 1; |
1418 | |||
1419 | if (c->freelist) | ||
1420 | stat(c, DEACTIVATE_REMOTE_FREES); | ||
1376 | /* | 1421 | /* |
1377 | * Merge cpu freelist into freelist. Typically we get here | 1422 | * Merge cpu freelist into freelist. Typically we get here |
1378 | * because both freelists are empty. So this is unlikely | 1423 | * because both freelists are empty. So this is unlikely |
1379 | * to occur. | 1424 | * to occur. |
1425 | * | ||
1426 | * We need to use _is_end here because deactivate slab may | ||
1427 | * be called for a debug slab. Then c->freelist may contain | ||
1428 | * a dummy pointer. | ||
1380 | */ | 1429 | */ |
1381 | while (unlikely(c->freelist)) { | 1430 | while (unlikely(!is_end(c->freelist))) { |
1382 | void **object; | 1431 | void **object; |
1383 | 1432 | ||
1384 | tail = 0; /* Hot objects. Put the slab first */ | 1433 | tail = 0; /* Hot objects. Put the slab first */ |
@@ -1398,6 +1447,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1398 | 1447 | ||
1399 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 1448 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1400 | { | 1449 | { |
1450 | stat(c, CPUSLAB_FLUSH); | ||
1401 | slab_lock(c->page); | 1451 | slab_lock(c->page); |
1402 | deactivate_slab(s, c); | 1452 | deactivate_slab(s, c); |
1403 | } | 1453 | } |
@@ -1469,16 +1519,21 @@ static void *__slab_alloc(struct kmem_cache *s, | |||
1469 | { | 1519 | { |
1470 | void **object; | 1520 | void **object; |
1471 | struct page *new; | 1521 | struct page *new; |
1522 | #ifdef SLUB_FASTPATH | ||
1523 | unsigned long flags; | ||
1472 | 1524 | ||
1525 | local_irq_save(flags); | ||
1526 | #endif | ||
1473 | if (!c->page) | 1527 | if (!c->page) |
1474 | goto new_slab; | 1528 | goto new_slab; |
1475 | 1529 | ||
1476 | slab_lock(c->page); | 1530 | slab_lock(c->page); |
1477 | if (unlikely(!node_match(c, node))) | 1531 | if (unlikely(!node_match(c, node))) |
1478 | goto another_slab; | 1532 | goto another_slab; |
1533 | stat(c, ALLOC_REFILL); | ||
1479 | load_freelist: | 1534 | load_freelist: |
1480 | object = c->page->freelist; | 1535 | object = c->page->freelist; |
1481 | if (unlikely(!object)) | 1536 | if (unlikely(object == c->page->end)) |
1482 | goto another_slab; | 1537 | goto another_slab; |
1483 | if (unlikely(SlabDebug(c->page))) | 1538 | if (unlikely(SlabDebug(c->page))) |
1484 | goto debug; | 1539 | goto debug; |
@@ -1486,9 +1541,15 @@ load_freelist: | |||
1486 | object = c->page->freelist; | 1541 | object = c->page->freelist; |
1487 | c->freelist = object[c->offset]; | 1542 | c->freelist = object[c->offset]; |
1488 | c->page->inuse = s->objects; | 1543 | c->page->inuse = s->objects; |
1489 | c->page->freelist = NULL; | 1544 | c->page->freelist = c->page->end; |
1490 | c->node = page_to_nid(c->page); | 1545 | c->node = page_to_nid(c->page); |
1546 | unlock_out: | ||
1491 | slab_unlock(c->page); | 1547 | slab_unlock(c->page); |
1548 | stat(c, ALLOC_SLOWPATH); | ||
1549 | out: | ||
1550 | #ifdef SLUB_FASTPATH | ||
1551 | local_irq_restore(flags); | ||
1552 | #endif | ||
1492 | return object; | 1553 | return object; |
1493 | 1554 | ||
1494 | another_slab: | 1555 | another_slab: |
@@ -1498,6 +1559,7 @@ new_slab: | |||
1498 | new = get_partial(s, gfpflags, node); | 1559 | new = get_partial(s, gfpflags, node); |
1499 | if (new) { | 1560 | if (new) { |
1500 | c->page = new; | 1561 | c->page = new; |
1562 | stat(c, ALLOC_FROM_PARTIAL); | ||
1501 | goto load_freelist; | 1563 | goto load_freelist; |
1502 | } | 1564 | } |
1503 | 1565 | ||
@@ -1511,6 +1573,7 @@ new_slab: | |||
1511 | 1573 | ||
1512 | if (new) { | 1574 | if (new) { |
1513 | c = get_cpu_slab(s, smp_processor_id()); | 1575 | c = get_cpu_slab(s, smp_processor_id()); |
1576 | stat(c, ALLOC_SLAB); | ||
1514 | if (c->page) | 1577 | if (c->page) |
1515 | flush_slab(s, c); | 1578 | flush_slab(s, c); |
1516 | slab_lock(new); | 1579 | slab_lock(new); |
@@ -1518,7 +1581,8 @@ new_slab: | |||
1518 | c->page = new; | 1581 | c->page = new; |
1519 | goto load_freelist; | 1582 | goto load_freelist; |
1520 | } | 1583 | } |
1521 | return NULL; | 1584 | object = NULL; |
1585 | goto out; | ||
1522 | debug: | 1586 | debug: |
1523 | object = c->page->freelist; | 1587 | object = c->page->freelist; |
1524 | if (!alloc_debug_processing(s, c->page, object, addr)) | 1588 | if (!alloc_debug_processing(s, c->page, object, addr)) |
@@ -1527,8 +1591,7 @@ debug: | |||
1527 | c->page->inuse++; | 1591 | c->page->inuse++; |
1528 | c->page->freelist = object[c->offset]; | 1592 | c->page->freelist = object[c->offset]; |
1529 | c->node = -1; | 1593 | c->node = -1; |
1530 | slab_unlock(c->page); | 1594 | goto unlock_out; |
1531 | return object; | ||
1532 | } | 1595 | } |
1533 | 1596 | ||
1534 | /* | 1597 | /* |
@@ -1545,20 +1608,50 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1545 | gfp_t gfpflags, int node, void *addr) | 1608 | gfp_t gfpflags, int node, void *addr) |
1546 | { | 1609 | { |
1547 | void **object; | 1610 | void **object; |
1548 | unsigned long flags; | ||
1549 | struct kmem_cache_cpu *c; | 1611 | struct kmem_cache_cpu *c; |
1550 | 1612 | ||
1613 | /* | ||
1614 | * The SLUB_FASTPATH path is provisional and is currently disabled if the | ||
1615 | * kernel is compiled with preemption or if the arch does not support | ||
1616 | * fast cmpxchg operations. There are a couple of coming changes that will | ||
1617 | * simplify matters and allow preemption. Ultimately we may end up making | ||
1618 | * SLUB_FASTPATH the default. | ||
1619 | * | ||
1620 | * 1. The introduction of the per cpu allocator will avoid array lookups | ||
1621 | * through get_cpu_slab(). A special register can be used instead. | ||
1622 | * | ||
1623 | * 2. The introduction of per cpu atomic operations (cpu_ops) means that | ||
1624 | * we can realize the logic here entirely with per cpu atomics. The | ||
1625 | * per cpu atomic ops will take care of the preemption issues. | ||
1626 | */ | ||
1627 | |||
1628 | #ifdef SLUB_FASTPATH | ||
1629 | c = get_cpu_slab(s, raw_smp_processor_id()); | ||
1630 | do { | ||
1631 | object = c->freelist; | ||
1632 | if (unlikely(is_end(object) || !node_match(c, node))) { | ||
1633 | object = __slab_alloc(s, gfpflags, node, addr, c); | ||
1634 | break; | ||
1635 | } | ||
1636 | stat(c, ALLOC_FASTPATH); | ||
1637 | } while (cmpxchg_local(&c->freelist, object, object[c->offset]) | ||
1638 | != object); | ||
1639 | #else | ||
1640 | unsigned long flags; | ||
1641 | |||
1551 | local_irq_save(flags); | 1642 | local_irq_save(flags); |
1552 | c = get_cpu_slab(s, smp_processor_id()); | 1643 | c = get_cpu_slab(s, smp_processor_id()); |
1553 | if (unlikely(!c->freelist || !node_match(c, node))) | 1644 | if (unlikely(is_end(c->freelist) || !node_match(c, node))) |
1554 | 1645 | ||
1555 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1646 | object = __slab_alloc(s, gfpflags, node, addr, c); |
1556 | 1647 | ||
1557 | else { | 1648 | else { |
1558 | object = c->freelist; | 1649 | object = c->freelist; |
1559 | c->freelist = object[c->offset]; | 1650 | c->freelist = object[c->offset]; |
1651 | stat(c, ALLOC_FASTPATH); | ||
1560 | } | 1652 | } |
1561 | local_irq_restore(flags); | 1653 | local_irq_restore(flags); |
1654 | #endif | ||
1562 | 1655 | ||
1563 | if (unlikely((gfpflags & __GFP_ZERO) && object)) | 1656 | if (unlikely((gfpflags & __GFP_ZERO) && object)) |
1564 | memset(object, 0, c->objsize); | 1657 | memset(object, 0, c->objsize); |
@@ -1593,7 +1686,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
1593 | { | 1686 | { |
1594 | void *prior; | 1687 | void *prior; |
1595 | void **object = (void *)x; | 1688 | void **object = (void *)x; |
1689 | struct kmem_cache_cpu *c; | ||
1690 | |||
1691 | #ifdef SLUB_FASTPATH | ||
1692 | unsigned long flags; | ||
1596 | 1693 | ||
1694 | local_irq_save(flags); | ||
1695 | #endif | ||
1696 | c = get_cpu_slab(s, raw_smp_processor_id()); | ||
1697 | stat(c, FREE_SLOWPATH); | ||
1597 | slab_lock(page); | 1698 | slab_lock(page); |
1598 | 1699 | ||
1599 | if (unlikely(SlabDebug(page))) | 1700 | if (unlikely(SlabDebug(page))) |
@@ -1603,8 +1704,10 @@ checks_ok: | |||
1603 | page->freelist = object; | 1704 | page->freelist = object; |
1604 | page->inuse--; | 1705 | page->inuse--; |
1605 | 1706 | ||
1606 | if (unlikely(SlabFrozen(page))) | 1707 | if (unlikely(SlabFrozen(page))) { |
1708 | stat(c, FREE_FROZEN); | ||
1607 | goto out_unlock; | 1709 | goto out_unlock; |
1710 | } | ||
1608 | 1711 | ||
1609 | if (unlikely(!page->inuse)) | 1712 | if (unlikely(!page->inuse)) |
1610 | goto slab_empty; | 1713 | goto slab_empty; |
@@ -1614,21 +1717,31 @@ checks_ok: | |||
1614 | * was not on the partial list before | 1717 | * was not on the partial list before |
1615 | * then add it. | 1718 | * then add it. |
1616 | */ | 1719 | */ |
1617 | if (unlikely(!prior)) | 1720 | if (unlikely(prior == page->end)) { |
1618 | add_partial(get_node(s, page_to_nid(page)), page, 1); | 1721 | add_partial(get_node(s, page_to_nid(page)), page, 1); |
1722 | stat(c, FREE_ADD_PARTIAL); | ||
1723 | } | ||
1619 | 1724 | ||
1620 | out_unlock: | 1725 | out_unlock: |
1621 | slab_unlock(page); | 1726 | slab_unlock(page); |
1727 | #ifdef SLUB_FASTPATH | ||
1728 | local_irq_restore(flags); | ||
1729 | #endif | ||
1622 | return; | 1730 | return; |
1623 | 1731 | ||
1624 | slab_empty: | 1732 | slab_empty: |
1625 | if (prior) | 1733 | if (prior != page->end) { |
1626 | /* | 1734 | /* |
1627 | * Slab still on the partial list. | 1735 | * Slab still on the partial list. |
1628 | */ | 1736 | */ |
1629 | remove_partial(s, page); | 1737 | remove_partial(s, page); |
1630 | 1738 | stat(c, FREE_REMOVE_PARTIAL); | |
1739 | } | ||
1631 | slab_unlock(page); | 1740 | slab_unlock(page); |
1741 | stat(c, FREE_SLAB); | ||
1742 | #ifdef SLUB_FASTPATH | ||
1743 | local_irq_restore(flags); | ||
1744 | #endif | ||
1632 | discard_slab(s, page); | 1745 | discard_slab(s, page); |
1633 | return; | 1746 | return; |
1634 | 1747 | ||
@@ -1653,19 +1766,49 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1653 | struct page *page, void *x, void *addr) | 1766 | struct page *page, void *x, void *addr) |
1654 | { | 1767 | { |
1655 | void **object = (void *)x; | 1768 | void **object = (void *)x; |
1656 | unsigned long flags; | ||
1657 | struct kmem_cache_cpu *c; | 1769 | struct kmem_cache_cpu *c; |
1658 | 1770 | ||
1771 | #ifdef SLUB_FASTPATH | ||
1772 | void **freelist; | ||
1773 | |||
1774 | c = get_cpu_slab(s, raw_smp_processor_id()); | ||
1775 | debug_check_no_locks_freed(object, s->objsize); | ||
1776 | do { | ||
1777 | freelist = c->freelist; | ||
1778 | barrier(); | ||
1779 | /* | ||
1780 | * If the compiler would reorder the retrieval of c->page to | ||
1781 | * come before c->freelist then an interrupt could | ||
1782 | * change the cpu slab before we retrieve c->freelist. We | ||
1783 | * could be matching on a page no longer active and put the | ||
1784 | * object onto the freelist of the wrong slab. | ||
1785 | * | ||
1786 | * On the other hand: If we already have the freelist pointer | ||
1787 | * then any change of cpu_slab will cause the cmpxchg to fail | ||
1788 | * since the freelist pointers are unique per slab. | ||
1789 | */ | ||
1790 | if (unlikely(page != c->page || c->node < 0)) { | ||
1791 | __slab_free(s, page, x, addr, c->offset); | ||
1792 | break; | ||
1793 | } | ||
1794 | object[c->offset] = freelist; | ||
1795 | stat(c, FREE_FASTPATH); | ||
1796 | } while (cmpxchg_local(&c->freelist, freelist, object) != freelist); | ||
1797 | #else | ||
1798 | unsigned long flags; | ||
1799 | |||
1659 | local_irq_save(flags); | 1800 | local_irq_save(flags); |
1660 | debug_check_no_locks_freed(object, s->objsize); | 1801 | debug_check_no_locks_freed(object, s->objsize); |
1661 | c = get_cpu_slab(s, smp_processor_id()); | 1802 | c = get_cpu_slab(s, smp_processor_id()); |
1662 | if (likely(page == c->page && c->node >= 0)) { | 1803 | if (likely(page == c->page && c->node >= 0)) { |
1663 | object[c->offset] = c->freelist; | 1804 | object[c->offset] = c->freelist; |
1664 | c->freelist = object; | 1805 | c->freelist = object; |
1806 | stat(c, FREE_FASTPATH); | ||
1665 | } else | 1807 | } else |
1666 | __slab_free(s, page, x, addr, c->offset); | 1808 | __slab_free(s, page, x, addr, c->offset); |
1667 | 1809 | ||
1668 | local_irq_restore(flags); | 1810 | local_irq_restore(flags); |
1811 | #endif | ||
1669 | } | 1812 | } |
1670 | 1813 | ||
1671 | void kmem_cache_free(struct kmem_cache *s, void *x) | 1814 | void kmem_cache_free(struct kmem_cache *s, void *x) |
@@ -1842,7 +1985,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s, | |||
1842 | struct kmem_cache_cpu *c) | 1985 | struct kmem_cache_cpu *c) |
1843 | { | 1986 | { |
1844 | c->page = NULL; | 1987 | c->page = NULL; |
1845 | c->freelist = NULL; | 1988 | c->freelist = (void *)PAGE_MAPPING_ANON; |
1846 | c->node = 0; | 1989 | c->node = 0; |
1847 | c->offset = s->offset / sizeof(void *); | 1990 | c->offset = s->offset / sizeof(void *); |
1848 | c->objsize = s->objsize; | 1991 | c->objsize = s->objsize; |
@@ -2446,7 +2589,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) | |||
2446 | goto unlock_out; | 2589 | goto unlock_out; |
2447 | 2590 | ||
2448 | realsize = kmalloc_caches[index].objsize; | 2591 | realsize = kmalloc_caches[index].objsize; |
2449 | text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize), | 2592 | text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", |
2593 | (unsigned int)realsize); | ||
2450 | s = kmalloc(kmem_size, flags & ~SLUB_DMA); | 2594 | s = kmalloc(kmem_size, flags & ~SLUB_DMA); |
2451 | 2595 | ||
2452 | if (!s || !text || !kmem_cache_open(s, flags, text, | 2596 | if (!s || !text || !kmem_cache_open(s, flags, text, |
@@ -2601,6 +2745,7 @@ EXPORT_SYMBOL(ksize); | |||
2601 | void kfree(const void *x) | 2745 | void kfree(const void *x) |
2602 | { | 2746 | { |
2603 | struct page *page; | 2747 | struct page *page; |
2748 | void *object = (void *)x; | ||
2604 | 2749 | ||
2605 | if (unlikely(ZERO_OR_NULL_PTR(x))) | 2750 | if (unlikely(ZERO_OR_NULL_PTR(x))) |
2606 | return; | 2751 | return; |
@@ -2610,7 +2755,7 @@ void kfree(const void *x) | |||
2610 | put_page(page); | 2755 | put_page(page); |
2611 | return; | 2756 | return; |
2612 | } | 2757 | } |
2613 | slab_free(page->slab, page, (void *)x, __builtin_return_address(0)); | 2758 | slab_free(page->slab, page, object, __builtin_return_address(0)); |
2614 | } | 2759 | } |
2615 | EXPORT_SYMBOL(kfree); | 2760 | EXPORT_SYMBOL(kfree); |
2616 | 2761 | ||
@@ -2896,7 +3041,8 @@ void __init kmem_cache_init(void) | |||
2896 | #endif | 3041 | #endif |
2897 | 3042 | ||
2898 | 3043 | ||
2899 | printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | 3044 | printk(KERN_INFO |
3045 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | ||
2900 | " CPUs=%d, Nodes=%d\n", | 3046 | " CPUs=%d, Nodes=%d\n", |
2901 | caches, cache_line_size(), | 3047 | caches, cache_line_size(), |
2902 | slub_min_order, slub_max_order, slub_min_objects, | 3048 | slub_min_order, slub_max_order, slub_min_objects, |
@@ -3063,7 +3209,7 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, | |||
3063 | } | 3209 | } |
3064 | 3210 | ||
3065 | static struct notifier_block __cpuinitdata slab_notifier = { | 3211 | static struct notifier_block __cpuinitdata slab_notifier = { |
3066 | &slab_cpuup_callback, NULL, 0 | 3212 | .notifier_call = slab_cpuup_callback |
3067 | }; | 3213 | }; |
3068 | 3214 | ||
3069 | #endif | 3215 | #endif |
@@ -3104,7 +3250,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, | |||
3104 | unsigned long *map) | 3250 | unsigned long *map) |
3105 | { | 3251 | { |
3106 | void *p; | 3252 | void *p; |
3107 | void *addr = page_address(page); | 3253 | void *addr = slab_address(page); |
3108 | 3254 | ||
3109 | if (!check_slab(s, page) || | 3255 | if (!check_slab(s, page) || |
3110 | !on_freelist(s, page, NULL)) | 3256 | !on_freelist(s, page, NULL)) |
@@ -3221,8 +3367,9 @@ static void resiliency_test(void) | |||
3221 | p = kzalloc(32, GFP_KERNEL); | 3367 | p = kzalloc(32, GFP_KERNEL); |
3222 | p[32 + sizeof(void *)] = 0x34; | 3368 | p[32 + sizeof(void *)] = 0x34; |
3223 | printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" | 3369 | printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" |
3224 | " 0x34 -> -0x%p\n", p); | 3370 | " 0x34 -> -0x%p\n", p); |
3225 | printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); | 3371 | printk(KERN_ERR |
3372 | "If allocated object is overwritten then not detectable\n\n"); | ||
3226 | 3373 | ||
3227 | validate_slab_cache(kmalloc_caches + 5); | 3374 | validate_slab_cache(kmalloc_caches + 5); |
3228 | p = kzalloc(64, GFP_KERNEL); | 3375 | p = kzalloc(64, GFP_KERNEL); |
@@ -3230,7 +3377,8 @@ static void resiliency_test(void) | |||
3230 | *p = 0x56; | 3377 | *p = 0x56; |
3231 | printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", | 3378 | printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", |
3232 | p); | 3379 | p); |
3233 | printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); | 3380 | printk(KERN_ERR |
3381 | "If allocated object is overwritten then not detectable\n\n"); | ||
3234 | validate_slab_cache(kmalloc_caches + 6); | 3382 | validate_slab_cache(kmalloc_caches + 6); |
3235 | 3383 | ||
3236 | printk(KERN_ERR "\nB. Corruption after free\n"); | 3384 | printk(KERN_ERR "\nB. Corruption after free\n"); |
@@ -3243,7 +3391,8 @@ static void resiliency_test(void) | |||
3243 | p = kzalloc(256, GFP_KERNEL); | 3391 | p = kzalloc(256, GFP_KERNEL); |
3244 | kfree(p); | 3392 | kfree(p); |
3245 | p[50] = 0x9a; | 3393 | p[50] = 0x9a; |
3246 | printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); | 3394 | printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", |
3395 | p); | ||
3247 | validate_slab_cache(kmalloc_caches + 8); | 3396 | validate_slab_cache(kmalloc_caches + 8); |
3248 | 3397 | ||
3249 | p = kzalloc(512, GFP_KERNEL); | 3398 | p = kzalloc(512, GFP_KERNEL); |
@@ -3384,7 +3533,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, | |||
3384 | static void process_slab(struct loc_track *t, struct kmem_cache *s, | 3533 | static void process_slab(struct loc_track *t, struct kmem_cache *s, |
3385 | struct page *page, enum track_item alloc) | 3534 | struct page *page, enum track_item alloc) |
3386 | { | 3535 | { |
3387 | void *addr = page_address(page); | 3536 | void *addr = slab_address(page); |
3388 | DECLARE_BITMAP(map, s->objects); | 3537 | DECLARE_BITMAP(map, s->objects); |
3389 | void *p; | 3538 | void *p; |
3390 | 3539 | ||
@@ -3872,6 +4021,62 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, | |||
3872 | SLAB_ATTR(remote_node_defrag_ratio); | 4021 | SLAB_ATTR(remote_node_defrag_ratio); |
3873 | #endif | 4022 | #endif |
3874 | 4023 | ||
4024 | #ifdef CONFIG_SLUB_STATS | ||
4025 | |||
4026 | static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) | ||
4027 | { | ||
4028 | unsigned long sum = 0; | ||
4029 | int cpu; | ||
4030 | int len; | ||
4031 | int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); | ||
4032 | |||
4033 | if (!data) | ||
4034 | return -ENOMEM; | ||
4035 | |||
4036 | for_each_online_cpu(cpu) { | ||
4037 | unsigned x = get_cpu_slab(s, cpu)->stat[si]; | ||
4038 | |||
4039 | data[cpu] = x; | ||
4040 | sum += x; | ||
4041 | } | ||
4042 | |||
4043 | len = sprintf(buf, "%lu", sum); | ||
4044 | |||
4045 | for_each_online_cpu(cpu) { | ||
4046 | if (data[cpu] && len < PAGE_SIZE - 20) | ||
4047 | len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]); | ||
4048 | } | ||
4049 | kfree(data); | ||
4050 | return len + sprintf(buf + len, "\n"); | ||
4051 | } | ||
4052 | |||
4053 | #define STAT_ATTR(si, text) \ | ||
4054 | static ssize_t text##_show(struct kmem_cache *s, char *buf) \ | ||
4055 | { \ | ||
4056 | return show_stat(s, buf, si); \ | ||
4057 | } \ | ||
4058 | SLAB_ATTR_RO(text); \ | ||
4059 | |||
4060 | STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); | ||
4061 | STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); | ||
4062 | STAT_ATTR(FREE_FASTPATH, free_fastpath); | ||
4063 | STAT_ATTR(FREE_SLOWPATH, free_slowpath); | ||
4064 | STAT_ATTR(FREE_FROZEN, free_frozen); | ||
4065 | STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial); | ||
4066 | STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial); | ||
4067 | STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial); | ||
4068 | STAT_ATTR(ALLOC_SLAB, alloc_slab); | ||
4069 | STAT_ATTR(ALLOC_REFILL, alloc_refill); | ||
4070 | STAT_ATTR(FREE_SLAB, free_slab); | ||
4071 | STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush); | ||
4072 | STAT_ATTR(DEACTIVATE_FULL, deactivate_full); | ||
4073 | STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty); | ||
4074 | STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); | ||
4075 | STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); | ||
4076 | STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); | ||
4077 | |||
4078 | #endif | ||
4079 | |||
3875 | static struct attribute *slab_attrs[] = { | 4080 | static struct attribute *slab_attrs[] = { |
3876 | &slab_size_attr.attr, | 4081 | &slab_size_attr.attr, |
3877 | &object_size_attr.attr, | 4082 | &object_size_attr.attr, |
@@ -3902,6 +4107,25 @@ static struct attribute *slab_attrs[] = { | |||
3902 | #ifdef CONFIG_NUMA | 4107 | #ifdef CONFIG_NUMA |
3903 | &remote_node_defrag_ratio_attr.attr, | 4108 | &remote_node_defrag_ratio_attr.attr, |
3904 | #endif | 4109 | #endif |
4110 | #ifdef CONFIG_SLUB_STATS | ||
4111 | &alloc_fastpath_attr.attr, | ||
4112 | &alloc_slowpath_attr.attr, | ||
4113 | &free_fastpath_attr.attr, | ||
4114 | &free_slowpath_attr.attr, | ||
4115 | &free_frozen_attr.attr, | ||
4116 | &free_add_partial_attr.attr, | ||
4117 | &free_remove_partial_attr.attr, | ||
4118 | &alloc_from_partial_attr.attr, | ||
4119 | &alloc_slab_attr.attr, | ||
4120 | &alloc_refill_attr.attr, | ||
4121 | &free_slab_attr.attr, | ||
4122 | &cpuslab_flush_attr.attr, | ||
4123 | &deactivate_full_attr.attr, | ||
4124 | &deactivate_empty_attr.attr, | ||
4125 | &deactivate_to_head_attr.attr, | ||
4126 | &deactivate_to_tail_attr.attr, | ||
4127 | &deactivate_remote_frees_attr.attr, | ||
4128 | #endif | ||
3905 | NULL | 4129 | NULL |
3906 | }; | 4130 | }; |
3907 | 4131 | ||