diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 217 |
1 files changed, 97 insertions, 120 deletions
@@ -291,32 +291,16 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) | |||
291 | #endif | 291 | #endif |
292 | } | 292 | } |
293 | 293 | ||
294 | /* | 294 | /* Verify that a pointer has an address that is valid within a slab page */ |
295 | * The end pointer in a slab is special. It points to the first object in the | ||
296 | * slab but has bit 0 set to mark it. | ||
297 | * | ||
298 | * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 | ||
299 | * in the mapping set. | ||
300 | */ | ||
301 | static inline int is_end(void *addr) | ||
302 | { | ||
303 | return (unsigned long)addr & PAGE_MAPPING_ANON; | ||
304 | } | ||
305 | |||
306 | static void *slab_address(struct page *page) | ||
307 | { | ||
308 | return page->end - PAGE_MAPPING_ANON; | ||
309 | } | ||
310 | |||
311 | static inline int check_valid_pointer(struct kmem_cache *s, | 295 | static inline int check_valid_pointer(struct kmem_cache *s, |
312 | struct page *page, const void *object) | 296 | struct page *page, const void *object) |
313 | { | 297 | { |
314 | void *base; | 298 | void *base; |
315 | 299 | ||
316 | if (object == page->end) | 300 | if (!object) |
317 | return 1; | 301 | return 1; |
318 | 302 | ||
319 | base = slab_address(page); | 303 | base = page_address(page); |
320 | if (object < base || object >= base + s->objects * s->size || | 304 | if (object < base || object >= base + s->objects * s->size || |
321 | (object - base) % s->size) { | 305 | (object - base) % s->size) { |
322 | return 0; | 306 | return 0; |
@@ -349,8 +333,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | |||
349 | 333 | ||
350 | /* Scan freelist */ | 334 | /* Scan freelist */ |
351 | #define for_each_free_object(__p, __s, __free) \ | 335 | #define for_each_free_object(__p, __s, __free) \ |
352 | for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ | 336 | for (__p = (__free); __p; __p = get_freepointer((__s), __p)) |
353 | __p)) | ||
354 | 337 | ||
355 | /* Determine object index from a given position */ | 338 | /* Determine object index from a given position */ |
356 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | 339 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) |
@@ -502,7 +485,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) | |||
502 | static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | 485 | static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) |
503 | { | 486 | { |
504 | unsigned int off; /* Offset of last byte */ | 487 | unsigned int off; /* Offset of last byte */ |
505 | u8 *addr = slab_address(page); | 488 | u8 *addr = page_address(page); |
506 | 489 | ||
507 | print_tracking(s, p); | 490 | print_tracking(s, p); |
508 | 491 | ||
@@ -637,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, | |||
637 | * A. Free pointer (if we cannot overwrite object on free) | 620 | * A. Free pointer (if we cannot overwrite object on free) |
638 | * B. Tracking data for SLAB_STORE_USER | 621 | * B. Tracking data for SLAB_STORE_USER |
639 | * C. Padding to reach required alignment boundary or at mininum | 622 | * C. Padding to reach required alignment boundary or at mininum |
640 | * one word if debuggin is on to be able to detect writes | 623 | * one word if debugging is on to be able to detect writes |
641 | * before the word boundary. | 624 | * before the word boundary. |
642 | * | 625 | * |
643 | * Padding is done using 0x5a (POISON_INUSE) | 626 | * Padding is done using 0x5a (POISON_INUSE) |
@@ -680,7 +663,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
680 | if (!(s->flags & SLAB_POISON)) | 663 | if (!(s->flags & SLAB_POISON)) |
681 | return 1; | 664 | return 1; |
682 | 665 | ||
683 | start = slab_address(page); | 666 | start = page_address(page); |
684 | end = start + (PAGE_SIZE << s->order); | 667 | end = start + (PAGE_SIZE << s->order); |
685 | length = s->objects * s->size; | 668 | length = s->objects * s->size; |
686 | remainder = end - (start + length); | 669 | remainder = end - (start + length); |
@@ -748,7 +731,7 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
748 | * of the free objects in this slab. May cause | 731 | * of the free objects in this slab. May cause |
749 | * another error because the object count is now wrong. | 732 | * another error because the object count is now wrong. |
750 | */ | 733 | */ |
751 | set_freepointer(s, p, page->end); | 734 | set_freepointer(s, p, NULL); |
752 | return 0; | 735 | return 0; |
753 | } | 736 | } |
754 | return 1; | 737 | return 1; |
@@ -782,18 +765,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | |||
782 | void *fp = page->freelist; | 765 | void *fp = page->freelist; |
783 | void *object = NULL; | 766 | void *object = NULL; |
784 | 767 | ||
785 | while (fp != page->end && nr <= s->objects) { | 768 | while (fp && nr <= s->objects) { |
786 | if (fp == search) | 769 | if (fp == search) |
787 | return 1; | 770 | return 1; |
788 | if (!check_valid_pointer(s, page, fp)) { | 771 | if (!check_valid_pointer(s, page, fp)) { |
789 | if (object) { | 772 | if (object) { |
790 | object_err(s, page, object, | 773 | object_err(s, page, object, |
791 | "Freechain corrupt"); | 774 | "Freechain corrupt"); |
792 | set_freepointer(s, object, page->end); | 775 | set_freepointer(s, object, NULL); |
793 | break; | 776 | break; |
794 | } else { | 777 | } else { |
795 | slab_err(s, page, "Freepointer corrupt"); | 778 | slab_err(s, page, "Freepointer corrupt"); |
796 | page->freelist = page->end; | 779 | page->freelist = NULL; |
797 | page->inuse = s->objects; | 780 | page->inuse = s->objects; |
798 | slab_fix(s, "Freelist cleared"); | 781 | slab_fix(s, "Freelist cleared"); |
799 | return 0; | 782 | return 0; |
@@ -870,7 +853,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, | |||
870 | if (!check_slab(s, page)) | 853 | if (!check_slab(s, page)) |
871 | goto bad; | 854 | goto bad; |
872 | 855 | ||
873 | if (object && !on_freelist(s, page, object)) { | 856 | if (!on_freelist(s, page, object)) { |
874 | object_err(s, page, object, "Object already allocated"); | 857 | object_err(s, page, object, "Object already allocated"); |
875 | goto bad; | 858 | goto bad; |
876 | } | 859 | } |
@@ -880,7 +863,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, | |||
880 | goto bad; | 863 | goto bad; |
881 | } | 864 | } |
882 | 865 | ||
883 | if (object && !check_object(s, page, object, 0)) | 866 | if (!check_object(s, page, object, 0)) |
884 | goto bad; | 867 | goto bad; |
885 | 868 | ||
886 | /* Success perform special debug activities for allocs */ | 869 | /* Success perform special debug activities for allocs */ |
@@ -899,7 +882,7 @@ bad: | |||
899 | */ | 882 | */ |
900 | slab_fix(s, "Marking all objects used"); | 883 | slab_fix(s, "Marking all objects used"); |
901 | page->inuse = s->objects; | 884 | page->inuse = s->objects; |
902 | page->freelist = page->end; | 885 | page->freelist = NULL; |
903 | } | 886 | } |
904 | return 0; | 887 | return 0; |
905 | } | 888 | } |
@@ -939,7 +922,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, | |||
939 | } | 922 | } |
940 | 923 | ||
941 | /* Special debug activities for freeing objects */ | 924 | /* Special debug activities for freeing objects */ |
942 | if (!SlabFrozen(page) && page->freelist == page->end) | 925 | if (!SlabFrozen(page) && !page->freelist) |
943 | remove_full(s, page); | 926 | remove_full(s, page); |
944 | if (s->flags & SLAB_STORE_USER) | 927 | if (s->flags & SLAB_STORE_USER) |
945 | set_track(s, object, TRACK_FREE, addr); | 928 | set_track(s, object, TRACK_FREE, addr); |
@@ -1015,30 +998,11 @@ static unsigned long kmem_cache_flags(unsigned long objsize, | |||
1015 | void (*ctor)(struct kmem_cache *, void *)) | 998 | void (*ctor)(struct kmem_cache *, void *)) |
1016 | { | 999 | { |
1017 | /* | 1000 | /* |
1018 | * The page->offset field is only 16 bit wide. This is an offset | 1001 | * Enable debugging if selected on the kernel commandline. |
1019 | * in units of words from the beginning of an object. If the slab | ||
1020 | * size is bigger then we cannot move the free pointer behind the | ||
1021 | * object anymore. | ||
1022 | * | ||
1023 | * On 32 bit platforms the limit is 256k. On 64bit platforms | ||
1024 | * the limit is 512k. | ||
1025 | * | ||
1026 | * Debugging or ctor may create a need to move the free | ||
1027 | * pointer. Fail if this happens. | ||
1028 | */ | 1002 | */ |
1029 | if (objsize >= 65535 * sizeof(void *)) { | 1003 | if (slub_debug && (!slub_debug_slabs || |
1030 | BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | | 1004 | strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0)) |
1031 | SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); | 1005 | flags |= slub_debug; |
1032 | BUG_ON(ctor); | ||
1033 | } else { | ||
1034 | /* | ||
1035 | * Enable debugging if selected on the kernel commandline. | ||
1036 | */ | ||
1037 | if (slub_debug && (!slub_debug_slabs || | ||
1038 | strncmp(slub_debug_slabs, name, | ||
1039 | strlen(slub_debug_slabs)) == 0)) | ||
1040 | flags |= slub_debug; | ||
1041 | } | ||
1042 | 1006 | ||
1043 | return flags; | 1007 | return flags; |
1044 | } | 1008 | } |
@@ -1124,7 +1088,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1124 | SetSlabDebug(page); | 1088 | SetSlabDebug(page); |
1125 | 1089 | ||
1126 | start = page_address(page); | 1090 | start = page_address(page); |
1127 | page->end = start + 1; | ||
1128 | 1091 | ||
1129 | if (unlikely(s->flags & SLAB_POISON)) | 1092 | if (unlikely(s->flags & SLAB_POISON)) |
1130 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); | 1093 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); |
@@ -1136,7 +1099,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1136 | last = p; | 1099 | last = p; |
1137 | } | 1100 | } |
1138 | setup_object(s, page, last); | 1101 | setup_object(s, page, last); |
1139 | set_freepointer(s, last, page->end); | 1102 | set_freepointer(s, last, NULL); |
1140 | 1103 | ||
1141 | page->freelist = start; | 1104 | page->freelist = start; |
1142 | page->inuse = 0; | 1105 | page->inuse = 0; |
@@ -1152,7 +1115,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1152 | void *p; | 1115 | void *p; |
1153 | 1116 | ||
1154 | slab_pad_check(s, page); | 1117 | slab_pad_check(s, page); |
1155 | for_each_object(p, s, slab_address(page)) | 1118 | for_each_object(p, s, page_address(page)) |
1156 | check_object(s, page, p, 0); | 1119 | check_object(s, page, p, 0); |
1157 | ClearSlabDebug(page); | 1120 | ClearSlabDebug(page); |
1158 | } | 1121 | } |
@@ -1162,7 +1125,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1162 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1125 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
1163 | -pages); | 1126 | -pages); |
1164 | 1127 | ||
1165 | page->mapping = NULL; | ||
1166 | __free_pages(page, s->order); | 1128 | __free_pages(page, s->order); |
1167 | } | 1129 | } |
1168 | 1130 | ||
@@ -1307,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1307 | * may return off node objects because partial slabs are obtained | 1269 | * may return off node objects because partial slabs are obtained |
1308 | * from other nodes and filled up. | 1270 | * from other nodes and filled up. |
1309 | * | 1271 | * |
1310 | * If /sys/slab/xx/defrag_ratio is set to 100 (which makes | 1272 | * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes |
1311 | * defrag_ratio = 1000) then every (well almost) allocation will | 1273 | * defrag_ratio = 1000) then every (well almost) allocation will |
1312 | * first attempt to defrag slab caches on other nodes. This means | 1274 | * first attempt to defrag slab caches on other nodes. This means |
1313 | * scanning over all nodes to look for partial slabs which may be | 1275 | * scanning over all nodes to look for partial slabs which may be |
@@ -1366,7 +1328,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1366 | ClearSlabFrozen(page); | 1328 | ClearSlabFrozen(page); |
1367 | if (page->inuse) { | 1329 | if (page->inuse) { |
1368 | 1330 | ||
1369 | if (page->freelist != page->end) { | 1331 | if (page->freelist) { |
1370 | add_partial(n, page, tail); | 1332 | add_partial(n, page, tail); |
1371 | stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); | 1333 | stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); |
1372 | } else { | 1334 | } else { |
@@ -1382,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1382 | * Adding an empty slab to the partial slabs in order | 1344 | * Adding an empty slab to the partial slabs in order |
1383 | * to avoid page allocator overhead. This slab needs | 1345 | * to avoid page allocator overhead. This slab needs |
1384 | * to come after the other slabs with objects in | 1346 | * to come after the other slabs with objects in |
1385 | * order to fill them up. That way the size of the | 1347 | * so that the others get filled first. That way the |
1386 | * partial list stays small. kmem_cache_shrink can | 1348 | * size of the partial list stays small. |
1387 | * reclaim empty slabs from the partial list. | 1349 | * |
1350 | * kmem_cache_shrink can reclaim any empty slabs from the | ||
1351 | * partial list. | ||
1388 | */ | 1352 | */ |
1389 | add_partial(n, page, 1); | 1353 | add_partial(n, page, 1); |
1390 | slab_unlock(page); | 1354 | slab_unlock(page); |
@@ -1404,18 +1368,14 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1404 | struct page *page = c->page; | 1368 | struct page *page = c->page; |
1405 | int tail = 1; | 1369 | int tail = 1; |
1406 | 1370 | ||
1407 | if (c->freelist) | 1371 | if (page->freelist) |
1408 | stat(c, DEACTIVATE_REMOTE_FREES); | 1372 | stat(c, DEACTIVATE_REMOTE_FREES); |
1409 | /* | 1373 | /* |
1410 | * Merge cpu freelist into freelist. Typically we get here | 1374 | * Merge cpu freelist into slab freelist. Typically we get here |
1411 | * because both freelists are empty. So this is unlikely | 1375 | * because both freelists are empty. So this is unlikely |
1412 | * to occur. | 1376 | * to occur. |
1413 | * | ||
1414 | * We need to use _is_end here because deactivate slab may | ||
1415 | * be called for a debug slab. Then c->freelist may contain | ||
1416 | * a dummy pointer. | ||
1417 | */ | 1377 | */ |
1418 | while (unlikely(!is_end(c->freelist))) { | 1378 | while (unlikely(c->freelist)) { |
1419 | void **object; | 1379 | void **object; |
1420 | 1380 | ||
1421 | tail = 0; /* Hot objects. Put the slab first */ | 1381 | tail = 0; /* Hot objects. Put the slab first */ |
@@ -1442,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1442 | 1402 | ||
1443 | /* | 1403 | /* |
1444 | * Flush cpu slab. | 1404 | * Flush cpu slab. |
1405 | * | ||
1445 | * Called from IPI handler with interrupts disabled. | 1406 | * Called from IPI handler with interrupts disabled. |
1446 | */ | 1407 | */ |
1447 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) | 1408 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) |
@@ -1500,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) | |||
1500 | * rest of the freelist to the lockless freelist. | 1461 | * rest of the freelist to the lockless freelist. |
1501 | * | 1462 | * |
1502 | * And if we were unable to get a new slab from the partial slab lists then | 1463 | * And if we were unable to get a new slab from the partial slab lists then |
1503 | * we need to allocate a new slab. This is slowest path since we may sleep. | 1464 | * we need to allocate a new slab. This is the slowest path since it involves |
1465 | * a call to the page allocator and the setup of a new slab. | ||
1504 | */ | 1466 | */ |
1505 | static void *__slab_alloc(struct kmem_cache *s, | 1467 | static void *__slab_alloc(struct kmem_cache *s, |
1506 | gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) | 1468 | gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) |
@@ -1514,18 +1476,19 @@ static void *__slab_alloc(struct kmem_cache *s, | |||
1514 | slab_lock(c->page); | 1476 | slab_lock(c->page); |
1515 | if (unlikely(!node_match(c, node))) | 1477 | if (unlikely(!node_match(c, node))) |
1516 | goto another_slab; | 1478 | goto another_slab; |
1479 | |||
1517 | stat(c, ALLOC_REFILL); | 1480 | stat(c, ALLOC_REFILL); |
1481 | |||
1518 | load_freelist: | 1482 | load_freelist: |
1519 | object = c->page->freelist; | 1483 | object = c->page->freelist; |
1520 | if (unlikely(object == c->page->end)) | 1484 | if (unlikely(!object)) |
1521 | goto another_slab; | 1485 | goto another_slab; |
1522 | if (unlikely(SlabDebug(c->page))) | 1486 | if (unlikely(SlabDebug(c->page))) |
1523 | goto debug; | 1487 | goto debug; |
1524 | 1488 | ||
1525 | object = c->page->freelist; | ||
1526 | c->freelist = object[c->offset]; | 1489 | c->freelist = object[c->offset]; |
1527 | c->page->inuse = s->objects; | 1490 | c->page->inuse = s->objects; |
1528 | c->page->freelist = c->page->end; | 1491 | c->page->freelist = NULL; |
1529 | c->node = page_to_nid(c->page); | 1492 | c->node = page_to_nid(c->page); |
1530 | unlock_out: | 1493 | unlock_out: |
1531 | slab_unlock(c->page); | 1494 | slab_unlock(c->page); |
@@ -1578,7 +1541,6 @@ new_slab: | |||
1578 | 1541 | ||
1579 | return NULL; | 1542 | return NULL; |
1580 | debug: | 1543 | debug: |
1581 | object = c->page->freelist; | ||
1582 | if (!alloc_debug_processing(s, c->page, object, addr)) | 1544 | if (!alloc_debug_processing(s, c->page, object, addr)) |
1583 | goto another_slab; | 1545 | goto another_slab; |
1584 | 1546 | ||
@@ -1607,7 +1569,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1607 | 1569 | ||
1608 | local_irq_save(flags); | 1570 | local_irq_save(flags); |
1609 | c = get_cpu_slab(s, smp_processor_id()); | 1571 | c = get_cpu_slab(s, smp_processor_id()); |
1610 | if (unlikely(is_end(c->freelist) || !node_match(c, node))) | 1572 | if (unlikely(!c->freelist || !node_match(c, node))) |
1611 | 1573 | ||
1612 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1574 | object = __slab_alloc(s, gfpflags, node, addr, c); |
1613 | 1575 | ||
@@ -1659,6 +1621,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
1659 | 1621 | ||
1660 | if (unlikely(SlabDebug(page))) | 1622 | if (unlikely(SlabDebug(page))) |
1661 | goto debug; | 1623 | goto debug; |
1624 | |||
1662 | checks_ok: | 1625 | checks_ok: |
1663 | prior = object[offset] = page->freelist; | 1626 | prior = object[offset] = page->freelist; |
1664 | page->freelist = object; | 1627 | page->freelist = object; |
@@ -1673,11 +1636,10 @@ checks_ok: | |||
1673 | goto slab_empty; | 1636 | goto slab_empty; |
1674 | 1637 | ||
1675 | /* | 1638 | /* |
1676 | * Objects left in the slab. If it | 1639 | * Objects left in the slab. If it was not on the partial list before |
1677 | * was not on the partial list before | ||
1678 | * then add it. | 1640 | * then add it. |
1679 | */ | 1641 | */ |
1680 | if (unlikely(prior == page->end)) { | 1642 | if (unlikely(!prior)) { |
1681 | add_partial(get_node(s, page_to_nid(page)), page, 1); | 1643 | add_partial(get_node(s, page_to_nid(page)), page, 1); |
1682 | stat(c, FREE_ADD_PARTIAL); | 1644 | stat(c, FREE_ADD_PARTIAL); |
1683 | } | 1645 | } |
@@ -1687,7 +1649,7 @@ out_unlock: | |||
1687 | return; | 1649 | return; |
1688 | 1650 | ||
1689 | slab_empty: | 1651 | slab_empty: |
1690 | if (prior != page->end) { | 1652 | if (prior) { |
1691 | /* | 1653 | /* |
1692 | * Slab still on the partial list. | 1654 | * Slab still on the partial list. |
1693 | */ | 1655 | */ |
@@ -1724,8 +1686,8 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1724 | unsigned long flags; | 1686 | unsigned long flags; |
1725 | 1687 | ||
1726 | local_irq_save(flags); | 1688 | local_irq_save(flags); |
1727 | debug_check_no_locks_freed(object, s->objsize); | ||
1728 | c = get_cpu_slab(s, smp_processor_id()); | 1689 | c = get_cpu_slab(s, smp_processor_id()); |
1690 | debug_check_no_locks_freed(object, c->objsize); | ||
1729 | if (likely(page == c->page && c->node >= 0)) { | 1691 | if (likely(page == c->page && c->node >= 0)) { |
1730 | object[c->offset] = c->freelist; | 1692 | object[c->offset] = c->freelist; |
1731 | c->freelist = object; | 1693 | c->freelist = object; |
@@ -1888,20 +1850,21 @@ static unsigned long calculate_alignment(unsigned long flags, | |||
1888 | unsigned long align, unsigned long size) | 1850 | unsigned long align, unsigned long size) |
1889 | { | 1851 | { |
1890 | /* | 1852 | /* |
1891 | * If the user wants hardware cache aligned objects then | 1853 | * If the user wants hardware cache aligned objects then follow that |
1892 | * follow that suggestion if the object is sufficiently | 1854 | * suggestion if the object is sufficiently large. |
1893 | * large. | ||
1894 | * | 1855 | * |
1895 | * The hardware cache alignment cannot override the | 1856 | * The hardware cache alignment cannot override the specified |
1896 | * specified alignment though. If that is greater | 1857 | * alignment though. If that is greater then use it. |
1897 | * then use it. | ||
1898 | */ | 1858 | */ |
1899 | if ((flags & SLAB_HWCACHE_ALIGN) && | 1859 | if (flags & SLAB_HWCACHE_ALIGN) { |
1900 | size > cache_line_size() / 2) | 1860 | unsigned long ralign = cache_line_size(); |
1901 | return max_t(unsigned long, align, cache_line_size()); | 1861 | while (size <= ralign / 2) |
1862 | ralign /= 2; | ||
1863 | align = max(align, ralign); | ||
1864 | } | ||
1902 | 1865 | ||
1903 | if (align < ARCH_SLAB_MINALIGN) | 1866 | if (align < ARCH_SLAB_MINALIGN) |
1904 | return ARCH_SLAB_MINALIGN; | 1867 | align = ARCH_SLAB_MINALIGN; |
1905 | 1868 | ||
1906 | return ALIGN(align, sizeof(void *)); | 1869 | return ALIGN(align, sizeof(void *)); |
1907 | } | 1870 | } |
@@ -1910,7 +1873,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s, | |||
1910 | struct kmem_cache_cpu *c) | 1873 | struct kmem_cache_cpu *c) |
1911 | { | 1874 | { |
1912 | c->page = NULL; | 1875 | c->page = NULL; |
1913 | c->freelist = (void *)PAGE_MAPPING_ANON; | 1876 | c->freelist = NULL; |
1914 | c->node = 0; | 1877 | c->node = 0; |
1915 | c->offset = s->offset / sizeof(void *); | 1878 | c->offset = s->offset / sizeof(void *); |
1916 | c->objsize = s->objsize; | 1879 | c->objsize = s->objsize; |
@@ -2092,6 +2055,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, | |||
2092 | #endif | 2055 | #endif |
2093 | init_kmem_cache_node(n); | 2056 | init_kmem_cache_node(n); |
2094 | atomic_long_inc(&n->nr_slabs); | 2057 | atomic_long_inc(&n->nr_slabs); |
2058 | |||
2095 | /* | 2059 | /* |
2096 | * lockdep requires consistent irq usage for each lock | 2060 | * lockdep requires consistent irq usage for each lock |
2097 | * so even though there cannot be a race this early in | 2061 | * so even though there cannot be a race this early in |
@@ -2173,6 +2137,14 @@ static int calculate_sizes(struct kmem_cache *s) | |||
2173 | unsigned long align = s->align; | 2137 | unsigned long align = s->align; |
2174 | 2138 | ||
2175 | /* | 2139 | /* |
2140 | * Round up object size to the next word boundary. We can only | ||
2141 | * place the free pointer at word boundaries and this determines | ||
2142 | * the possible location of the free pointer. | ||
2143 | */ | ||
2144 | size = ALIGN(size, sizeof(void *)); | ||
2145 | |||
2146 | #ifdef CONFIG_SLUB_DEBUG | ||
2147 | /* | ||
2176 | * Determine if we can poison the object itself. If the user of | 2148 | * Determine if we can poison the object itself. If the user of |
2177 | * the slab may touch the object after free or before allocation | 2149 | * the slab may touch the object after free or before allocation |
2178 | * then we should never poison the object itself. | 2150 | * then we should never poison the object itself. |
@@ -2183,14 +2155,7 @@ static int calculate_sizes(struct kmem_cache *s) | |||
2183 | else | 2155 | else |
2184 | s->flags &= ~__OBJECT_POISON; | 2156 | s->flags &= ~__OBJECT_POISON; |
2185 | 2157 | ||
2186 | /* | ||
2187 | * Round up object size to the next word boundary. We can only | ||
2188 | * place the free pointer at word boundaries and this determines | ||
2189 | * the possible location of the free pointer. | ||
2190 | */ | ||
2191 | size = ALIGN(size, sizeof(void *)); | ||
2192 | 2158 | ||
2193 | #ifdef CONFIG_SLUB_DEBUG | ||
2194 | /* | 2159 | /* |
2195 | * If we are Redzoning then check if there is some space between the | 2160 | * If we are Redzoning then check if there is some space between the |
2196 | * end of the object and the free pointer. If not then add an | 2161 | * end of the object and the free pointer. If not then add an |
@@ -2343,7 +2308,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) | |||
2343 | /* | 2308 | /* |
2344 | * We could also check if the object is on the slabs freelist. | 2309 | * We could also check if the object is on the slabs freelist. |
2345 | * But this would be too expensive and it seems that the main | 2310 | * But this would be too expensive and it seems that the main |
2346 | * purpose of kmem_ptr_valid is to check if the object belongs | 2311 | * purpose of kmem_ptr_valid() is to check if the object belongs |
2347 | * to a certain slab. | 2312 | * to a certain slab. |
2348 | */ | 2313 | */ |
2349 | return 1; | 2314 | return 1; |
@@ -2630,13 +2595,24 @@ void *__kmalloc(size_t size, gfp_t flags) | |||
2630 | } | 2595 | } |
2631 | EXPORT_SYMBOL(__kmalloc); | 2596 | EXPORT_SYMBOL(__kmalloc); |
2632 | 2597 | ||
2598 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | ||
2599 | { | ||
2600 | struct page *page = alloc_pages_node(node, flags | __GFP_COMP, | ||
2601 | get_order(size)); | ||
2602 | |||
2603 | if (page) | ||
2604 | return page_address(page); | ||
2605 | else | ||
2606 | return NULL; | ||
2607 | } | ||
2608 | |||
2633 | #ifdef CONFIG_NUMA | 2609 | #ifdef CONFIG_NUMA |
2634 | void *__kmalloc_node(size_t size, gfp_t flags, int node) | 2610 | void *__kmalloc_node(size_t size, gfp_t flags, int node) |
2635 | { | 2611 | { |
2636 | struct kmem_cache *s; | 2612 | struct kmem_cache *s; |
2637 | 2613 | ||
2638 | if (unlikely(size > PAGE_SIZE)) | 2614 | if (unlikely(size > PAGE_SIZE)) |
2639 | return kmalloc_large(size, flags); | 2615 | return kmalloc_large_node(size, flags, node); |
2640 | 2616 | ||
2641 | s = get_slab(size, flags); | 2617 | s = get_slab(size, flags); |
2642 | 2618 | ||
@@ -2653,19 +2629,17 @@ size_t ksize(const void *object) | |||
2653 | struct page *page; | 2629 | struct page *page; |
2654 | struct kmem_cache *s; | 2630 | struct kmem_cache *s; |
2655 | 2631 | ||
2656 | BUG_ON(!object); | ||
2657 | if (unlikely(object == ZERO_SIZE_PTR)) | 2632 | if (unlikely(object == ZERO_SIZE_PTR)) |
2658 | return 0; | 2633 | return 0; |
2659 | 2634 | ||
2660 | page = virt_to_head_page(object); | 2635 | page = virt_to_head_page(object); |
2661 | BUG_ON(!page); | ||
2662 | 2636 | ||
2663 | if (unlikely(!PageSlab(page))) | 2637 | if (unlikely(!PageSlab(page))) |
2664 | return PAGE_SIZE << compound_order(page); | 2638 | return PAGE_SIZE << compound_order(page); |
2665 | 2639 | ||
2666 | s = page->slab; | 2640 | s = page->slab; |
2667 | BUG_ON(!s); | ||
2668 | 2641 | ||
2642 | #ifdef CONFIG_SLUB_DEBUG | ||
2669 | /* | 2643 | /* |
2670 | * Debugging requires use of the padding between object | 2644 | * Debugging requires use of the padding between object |
2671 | * and whatever may come after it. | 2645 | * and whatever may come after it. |
@@ -2673,6 +2647,7 @@ size_t ksize(const void *object) | |||
2673 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | 2647 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) |
2674 | return s->objsize; | 2648 | return s->objsize; |
2675 | 2649 | ||
2650 | #endif | ||
2676 | /* | 2651 | /* |
2677 | * If we have the need to store the freelist pointer | 2652 | * If we have the need to store the freelist pointer |
2678 | * back there or track user information then we can | 2653 | * back there or track user information then we can |
@@ -2680,7 +2655,6 @@ size_t ksize(const void *object) | |||
2680 | */ | 2655 | */ |
2681 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | 2656 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) |
2682 | return s->inuse; | 2657 | return s->inuse; |
2683 | |||
2684 | /* | 2658 | /* |
2685 | * Else we can use all the padding etc for the allocation | 2659 | * Else we can use all the padding etc for the allocation |
2686 | */ | 2660 | */ |
@@ -2957,7 +2931,7 @@ void __init kmem_cache_init(void) | |||
2957 | /* | 2931 | /* |
2958 | * Patch up the size_index table if we have strange large alignment | 2932 | * Patch up the size_index table if we have strange large alignment |
2959 | * requirements for the kmalloc array. This is only the case for | 2933 | * requirements for the kmalloc array. This is only the case for |
2960 | * mips it seems. The standard arches will not generate any code here. | 2934 | * MIPS it seems. The standard arches will not generate any code here. |
2961 | * | 2935 | * |
2962 | * Largest permitted alignment is 256 bytes due to the way we | 2936 | * Largest permitted alignment is 256 bytes due to the way we |
2963 | * handle the index determination for the smaller caches. | 2937 | * handle the index determination for the smaller caches. |
@@ -2986,7 +2960,6 @@ void __init kmem_cache_init(void) | |||
2986 | kmem_size = sizeof(struct kmem_cache); | 2960 | kmem_size = sizeof(struct kmem_cache); |
2987 | #endif | 2961 | #endif |
2988 | 2962 | ||
2989 | |||
2990 | printk(KERN_INFO | 2963 | printk(KERN_INFO |
2991 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | 2964 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," |
2992 | " CPUs=%d, Nodes=%d\n", | 2965 | " CPUs=%d, Nodes=%d\n", |
@@ -3083,12 +3056,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
3083 | */ | 3056 | */ |
3084 | for_each_online_cpu(cpu) | 3057 | for_each_online_cpu(cpu) |
3085 | get_cpu_slab(s, cpu)->objsize = s->objsize; | 3058 | get_cpu_slab(s, cpu)->objsize = s->objsize; |
3059 | |||
3086 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 3060 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); |
3087 | up_write(&slub_lock); | 3061 | up_write(&slub_lock); |
3062 | |||
3088 | if (sysfs_slab_alias(s, name)) | 3063 | if (sysfs_slab_alias(s, name)) |
3089 | goto err; | 3064 | goto err; |
3090 | return s; | 3065 | return s; |
3091 | } | 3066 | } |
3067 | |||
3092 | s = kmalloc(kmem_size, GFP_KERNEL); | 3068 | s = kmalloc(kmem_size, GFP_KERNEL); |
3093 | if (s) { | 3069 | if (s) { |
3094 | if (kmem_cache_open(s, GFP_KERNEL, name, | 3070 | if (kmem_cache_open(s, GFP_KERNEL, name, |
@@ -3184,7 +3160,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
3184 | struct kmem_cache *s; | 3160 | struct kmem_cache *s; |
3185 | 3161 | ||
3186 | if (unlikely(size > PAGE_SIZE)) | 3162 | if (unlikely(size > PAGE_SIZE)) |
3187 | return kmalloc_large(size, gfpflags); | 3163 | return kmalloc_large_node(size, gfpflags, node); |
3188 | 3164 | ||
3189 | s = get_slab(size, gfpflags); | 3165 | s = get_slab(size, gfpflags); |
3190 | 3166 | ||
@@ -3199,7 +3175,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, | |||
3199 | unsigned long *map) | 3175 | unsigned long *map) |
3200 | { | 3176 | { |
3201 | void *p; | 3177 | void *p; |
3202 | void *addr = slab_address(page); | 3178 | void *addr = page_address(page); |
3203 | 3179 | ||
3204 | if (!check_slab(s, page) || | 3180 | if (!check_slab(s, page) || |
3205 | !on_freelist(s, page, NULL)) | 3181 | !on_freelist(s, page, NULL)) |
@@ -3482,7 +3458,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, | |||
3482 | static void process_slab(struct loc_track *t, struct kmem_cache *s, | 3458 | static void process_slab(struct loc_track *t, struct kmem_cache *s, |
3483 | struct page *page, enum track_item alloc) | 3459 | struct page *page, enum track_item alloc) |
3484 | { | 3460 | { |
3485 | void *addr = slab_address(page); | 3461 | void *addr = page_address(page); |
3486 | DECLARE_BITMAP(map, s->objects); | 3462 | DECLARE_BITMAP(map, s->objects); |
3487 | void *p; | 3463 | void *p; |
3488 | 3464 | ||
@@ -3591,8 +3567,8 @@ enum slab_stat_type { | |||
3591 | #define SO_CPU (1 << SL_CPU) | 3567 | #define SO_CPU (1 << SL_CPU) |
3592 | #define SO_OBJECTS (1 << SL_OBJECTS) | 3568 | #define SO_OBJECTS (1 << SL_OBJECTS) |
3593 | 3569 | ||
3594 | static unsigned long slab_objects(struct kmem_cache *s, | 3570 | static ssize_t show_slab_objects(struct kmem_cache *s, |
3595 | char *buf, unsigned long flags) | 3571 | char *buf, unsigned long flags) |
3596 | { | 3572 | { |
3597 | unsigned long total = 0; | 3573 | unsigned long total = 0; |
3598 | int cpu; | 3574 | int cpu; |
@@ -3602,6 +3578,8 @@ static unsigned long slab_objects(struct kmem_cache *s, | |||
3602 | unsigned long *per_cpu; | 3578 | unsigned long *per_cpu; |
3603 | 3579 | ||
3604 | nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); | 3580 | nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); |
3581 | if (!nodes) | ||
3582 | return -ENOMEM; | ||
3605 | per_cpu = nodes + nr_node_ids; | 3583 | per_cpu = nodes + nr_node_ids; |
3606 | 3584 | ||
3607 | for_each_possible_cpu(cpu) { | 3585 | for_each_possible_cpu(cpu) { |
@@ -3754,25 +3732,25 @@ SLAB_ATTR_RO(aliases); | |||
3754 | 3732 | ||
3755 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) | 3733 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) |
3756 | { | 3734 | { |
3757 | return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); | 3735 | return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); |
3758 | } | 3736 | } |
3759 | SLAB_ATTR_RO(slabs); | 3737 | SLAB_ATTR_RO(slabs); |
3760 | 3738 | ||
3761 | static ssize_t partial_show(struct kmem_cache *s, char *buf) | 3739 | static ssize_t partial_show(struct kmem_cache *s, char *buf) |
3762 | { | 3740 | { |
3763 | return slab_objects(s, buf, SO_PARTIAL); | 3741 | return show_slab_objects(s, buf, SO_PARTIAL); |
3764 | } | 3742 | } |
3765 | SLAB_ATTR_RO(partial); | 3743 | SLAB_ATTR_RO(partial); |
3766 | 3744 | ||
3767 | static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) | 3745 | static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) |
3768 | { | 3746 | { |
3769 | return slab_objects(s, buf, SO_CPU); | 3747 | return show_slab_objects(s, buf, SO_CPU); |
3770 | } | 3748 | } |
3771 | SLAB_ATTR_RO(cpu_slabs); | 3749 | SLAB_ATTR_RO(cpu_slabs); |
3772 | 3750 | ||
3773 | static ssize_t objects_show(struct kmem_cache *s, char *buf) | 3751 | static ssize_t objects_show(struct kmem_cache *s, char *buf) |
3774 | { | 3752 | { |
3775 | return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); | 3753 | return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); |
3776 | } | 3754 | } |
3777 | SLAB_ATTR_RO(objects); | 3755 | SLAB_ATTR_RO(objects); |
3778 | 3756 | ||
@@ -3971,7 +3949,6 @@ SLAB_ATTR(remote_node_defrag_ratio); | |||
3971 | #endif | 3949 | #endif |
3972 | 3950 | ||
3973 | #ifdef CONFIG_SLUB_STATS | 3951 | #ifdef CONFIG_SLUB_STATS |
3974 | |||
3975 | static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) | 3952 | static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) |
3976 | { | 3953 | { |
3977 | unsigned long sum = 0; | 3954 | unsigned long sum = 0; |
@@ -4155,8 +4132,8 @@ static struct kset *slab_kset; | |||
4155 | #define ID_STR_LENGTH 64 | 4132 | #define ID_STR_LENGTH 64 |
4156 | 4133 | ||
4157 | /* Create a unique string id for a slab cache: | 4134 | /* Create a unique string id for a slab cache: |
4158 | * format | 4135 | * |
4159 | * :[flags-]size:[memory address of kmemcache] | 4136 | * Format :[flags-]size |
4160 | */ | 4137 | */ |
4161 | static char *create_unique_id(struct kmem_cache *s) | 4138 | static char *create_unique_id(struct kmem_cache *s) |
4162 | { | 4139 | { |