diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-26 15:46:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-26 15:46:18 -0400 |
commit | 138c4ae9cfda8fdcf9e137457853b09ef8cf8f77 (patch) | |
tree | 704c363de6d5868b08e9ae31a436ff04d423f625 | |
parent | 3b3dd79d6a8b3debd0291465fc8cd9caf765d545 (diff) | |
parent | e182a345d40deba7c3165a2857812bf403818319 (diff) |
Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
* 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux:
tools, slub: Fix off-by-one buffer corruption after readlink() call
slub: Discard slab page when node partial > minimum partial number
slub: correct comments error for per cpu partial
mm: restrict access to slab files under procfs and sysfs
slub: Code optimization in get_partial_node()
slub: doc: update the slabinfo.c file path
slub: explicitly document position of inserting slab to partial list
slub: update slabinfo tools to report per cpu partial list statistics
slub: per cpu cache for partial pages
slub: return object pointer from get_partial() / new_slab().
slub: pass kmem_cache_cpu pointer to get_partial()
slub: Prepare inuse field in new_slab()
slub: Remove useless statements in __slab_alloc
slub: free slabs without holding locks
slub: use print_hex_dump
slab: use print_hex_dump
-rw-r--r-- | Documentation/vm/00-INDEX | 2 | ||||
-rw-r--r-- | include/linux/mm_types.h | 14 | ||||
-rw-r--r-- | include/linux/slub_def.h | 4 | ||||
-rw-r--r-- | mm/slab.c | 19 | ||||
-rw-r--r-- | mm/slub.c | 558 | ||||
-rw-r--r-- | tools/slub/slabinfo.c | 10 |
6 files changed, 425 insertions, 182 deletions
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX index dca82d7c83d8..5481c8ba3412 100644 --- a/Documentation/vm/00-INDEX +++ b/Documentation/vm/00-INDEX | |||
@@ -30,8 +30,6 @@ page_migration | |||
30 | - description of page migration in NUMA systems. | 30 | - description of page migration in NUMA systems. |
31 | pagemap.txt | 31 | pagemap.txt |
32 | - pagemap, from the userspace perspective | 32 | - pagemap, from the userspace perspective |
33 | slabinfo.c | ||
34 | - source code for a tool to get reports about slabs. | ||
35 | slub.txt | 33 | slub.txt |
36 | - a short users guide for SLUB. | 34 | - a short users guide for SLUB. |
37 | unevictable-lru.txt | 35 | unevictable-lru.txt |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 29971a589ff2..c93d00a6e95d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -79,9 +79,21 @@ struct page { | |||
79 | }; | 79 | }; |
80 | 80 | ||
81 | /* Third double word block */ | 81 | /* Third double word block */ |
82 | struct list_head lru; /* Pageout list, eg. active_list | 82 | union { |
83 | struct list_head lru; /* Pageout list, eg. active_list | ||
83 | * protected by zone->lru_lock ! | 84 | * protected by zone->lru_lock ! |
84 | */ | 85 | */ |
86 | struct { /* slub per cpu partial pages */ | ||
87 | struct page *next; /* Next partial slab */ | ||
88 | #ifdef CONFIG_64BIT | ||
89 | int pages; /* Nr of partial slabs left */ | ||
90 | int pobjects; /* Approximate # of objects */ | ||
91 | #else | ||
92 | short int pages; | ||
93 | short int pobjects; | ||
94 | #endif | ||
95 | }; | ||
96 | }; | ||
85 | 97 | ||
86 | /* Remainder is not double word aligned */ | 98 | /* Remainder is not double word aligned */ |
87 | union { | 99 | union { |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index f58d6413d230..a32bcfdc7834 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -36,12 +36,15 @@ enum stat_item { | |||
36 | ORDER_FALLBACK, /* Number of times fallback was necessary */ | 36 | ORDER_FALLBACK, /* Number of times fallback was necessary */ |
37 | CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */ | 37 | CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */ |
38 | CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */ | 38 | CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */ |
39 | CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */ | ||
40 | CPU_PARTIAL_FREE, /* USed cpu partial on free */ | ||
39 | NR_SLUB_STAT_ITEMS }; | 41 | NR_SLUB_STAT_ITEMS }; |
40 | 42 | ||
41 | struct kmem_cache_cpu { | 43 | struct kmem_cache_cpu { |
42 | void **freelist; /* Pointer to next available object */ | 44 | void **freelist; /* Pointer to next available object */ |
43 | unsigned long tid; /* Globally unique transaction id */ | 45 | unsigned long tid; /* Globally unique transaction id */ |
44 | struct page *page; /* The slab from which we are allocating */ | 46 | struct page *page; /* The slab from which we are allocating */ |
47 | struct page *partial; /* Partially allocated frozen slabs */ | ||
45 | int node; /* The node of the page (or -1 for debug) */ | 48 | int node; /* The node of the page (or -1 for debug) */ |
46 | #ifdef CONFIG_SLUB_STATS | 49 | #ifdef CONFIG_SLUB_STATS |
47 | unsigned stat[NR_SLUB_STAT_ITEMS]; | 50 | unsigned stat[NR_SLUB_STAT_ITEMS]; |
@@ -79,6 +82,7 @@ struct kmem_cache { | |||
79 | int size; /* The size of an object including meta data */ | 82 | int size; /* The size of an object including meta data */ |
80 | int objsize; /* The size of an object without meta data */ | 83 | int objsize; /* The size of an object without meta data */ |
81 | int offset; /* Free pointer offset. */ | 84 | int offset; /* Free pointer offset. */ |
85 | int cpu_partial; /* Number of per cpu partial objects to keep around */ | ||
82 | struct kmem_cache_order_objects oo; | 86 | struct kmem_cache_order_objects oo; |
83 | 87 | ||
84 | /* Allocation and freeing of slabs */ | 88 | /* Allocation and freeing of slabs */ |
@@ -1851,15 +1851,15 @@ static void dump_line(char *data, int offset, int limit) | |||
1851 | unsigned char error = 0; | 1851 | unsigned char error = 0; |
1852 | int bad_count = 0; | 1852 | int bad_count = 0; |
1853 | 1853 | ||
1854 | printk(KERN_ERR "%03x:", offset); | 1854 | printk(KERN_ERR "%03x: ", offset); |
1855 | for (i = 0; i < limit; i++) { | 1855 | for (i = 0; i < limit; i++) { |
1856 | if (data[offset + i] != POISON_FREE) { | 1856 | if (data[offset + i] != POISON_FREE) { |
1857 | error = data[offset + i]; | 1857 | error = data[offset + i]; |
1858 | bad_count++; | 1858 | bad_count++; |
1859 | } | 1859 | } |
1860 | printk(" %02x", (unsigned char)data[offset + i]); | ||
1861 | } | 1860 | } |
1862 | printk("\n"); | 1861 | print_hex_dump(KERN_CONT, "", 0, 16, 1, |
1862 | &data[offset], limit, 1); | ||
1863 | 1863 | ||
1864 | if (bad_count == 1) { | 1864 | if (bad_count == 1) { |
1865 | error ^= POISON_FREE; | 1865 | error ^= POISON_FREE; |
@@ -3039,14 +3039,9 @@ bad: | |||
3039 | printk(KERN_ERR "slab: Internal list corruption detected in " | 3039 | printk(KERN_ERR "slab: Internal list corruption detected in " |
3040 | "cache '%s'(%d), slabp %p(%d). Hexdump:\n", | 3040 | "cache '%s'(%d), slabp %p(%d). Hexdump:\n", |
3041 | cachep->name, cachep->num, slabp, slabp->inuse); | 3041 | cachep->name, cachep->num, slabp, slabp->inuse); |
3042 | for (i = 0; | 3042 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, |
3043 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); | 3043 | sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), |
3044 | i++) { | 3044 | 1); |
3045 | if (i % 16 == 0) | ||
3046 | printk("\n%03x:", i); | ||
3047 | printk(" %02x", ((unsigned char *)slabp)[i]); | ||
3048 | } | ||
3049 | printk("\n"); | ||
3050 | BUG(); | 3045 | BUG(); |
3051 | } | 3046 | } |
3052 | } | 3047 | } |
@@ -4584,7 +4579,7 @@ static const struct file_operations proc_slabstats_operations = { | |||
4584 | 4579 | ||
4585 | static int __init slab_proc_init(void) | 4580 | static int __init slab_proc_init(void) |
4586 | { | 4581 | { |
4587 | proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); | 4582 | proc_create("slabinfo",S_IWUSR|S_IRUSR,NULL,&proc_slabinfo_operations); |
4588 | #ifdef CONFIG_DEBUG_SLAB_LEAK | 4583 | #ifdef CONFIG_DEBUG_SLAB_LEAK |
4589 | proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); | 4584 | proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); |
4590 | #endif | 4585 | #endif |
@@ -467,34 +467,8 @@ static int disable_higher_order_debug; | |||
467 | */ | 467 | */ |
468 | static void print_section(char *text, u8 *addr, unsigned int length) | 468 | static void print_section(char *text, u8 *addr, unsigned int length) |
469 | { | 469 | { |
470 | int i, offset; | 470 | print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, |
471 | int newline = 1; | 471 | length, 1); |
472 | char ascii[17]; | ||
473 | |||
474 | ascii[16] = 0; | ||
475 | |||
476 | for (i = 0; i < length; i++) { | ||
477 | if (newline) { | ||
478 | printk(KERN_ERR "%8s 0x%p: ", text, addr + i); | ||
479 | newline = 0; | ||
480 | } | ||
481 | printk(KERN_CONT " %02x", addr[i]); | ||
482 | offset = i % 16; | ||
483 | ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; | ||
484 | if (offset == 15) { | ||
485 | printk(KERN_CONT " %s\n", ascii); | ||
486 | newline = 1; | ||
487 | } | ||
488 | } | ||
489 | if (!newline) { | ||
490 | i %= 16; | ||
491 | while (i < 16) { | ||
492 | printk(KERN_CONT " "); | ||
493 | ascii[i] = ' '; | ||
494 | i++; | ||
495 | } | ||
496 | printk(KERN_CONT " %s\n", ascii); | ||
497 | } | ||
498 | } | 472 | } |
499 | 473 | ||
500 | static struct track *get_track(struct kmem_cache *s, void *object, | 474 | static struct track *get_track(struct kmem_cache *s, void *object, |
@@ -625,12 +599,12 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
625 | p, p - addr, get_freepointer(s, p)); | 599 | p, p - addr, get_freepointer(s, p)); |
626 | 600 | ||
627 | if (p > addr + 16) | 601 | if (p > addr + 16) |
628 | print_section("Bytes b4", p - 16, 16); | 602 | print_section("Bytes b4 ", p - 16, 16); |
629 | |||
630 | print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE)); | ||
631 | 603 | ||
604 | print_section("Object ", p, min_t(unsigned long, s->objsize, | ||
605 | PAGE_SIZE)); | ||
632 | if (s->flags & SLAB_RED_ZONE) | 606 | if (s->flags & SLAB_RED_ZONE) |
633 | print_section("Redzone", p + s->objsize, | 607 | print_section("Redzone ", p + s->objsize, |
634 | s->inuse - s->objsize); | 608 | s->inuse - s->objsize); |
635 | 609 | ||
636 | if (s->offset) | 610 | if (s->offset) |
@@ -643,7 +617,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
643 | 617 | ||
644 | if (off != s->size) | 618 | if (off != s->size) |
645 | /* Beginning of the filler is the free pointer */ | 619 | /* Beginning of the filler is the free pointer */ |
646 | print_section("Padding", p + off, s->size - off); | 620 | print_section("Padding ", p + off, s->size - off); |
647 | 621 | ||
648 | dump_stack(); | 622 | dump_stack(); |
649 | } | 623 | } |
@@ -838,7 +812,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
838 | end--; | 812 | end--; |
839 | 813 | ||
840 | slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); | 814 | slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); |
841 | print_section("Padding", end - remainder, remainder); | 815 | print_section("Padding ", end - remainder, remainder); |
842 | 816 | ||
843 | restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); | 817 | restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); |
844 | return 0; | 818 | return 0; |
@@ -987,7 +961,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, | |||
987 | page->freelist); | 961 | page->freelist); |
988 | 962 | ||
989 | if (!alloc) | 963 | if (!alloc) |
990 | print_section("Object", (void *)object, s->objsize); | 964 | print_section("Object ", (void *)object, s->objsize); |
991 | 965 | ||
992 | dump_stack(); | 966 | dump_stack(); |
993 | } | 967 | } |
@@ -1447,7 +1421,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1447 | set_freepointer(s, last, NULL); | 1421 | set_freepointer(s, last, NULL); |
1448 | 1422 | ||
1449 | page->freelist = start; | 1423 | page->freelist = start; |
1450 | page->inuse = 0; | 1424 | page->inuse = page->objects; |
1451 | page->frozen = 1; | 1425 | page->frozen = 1; |
1452 | out: | 1426 | out: |
1453 | return page; | 1427 | return page; |
@@ -1534,7 +1508,7 @@ static inline void add_partial(struct kmem_cache_node *n, | |||
1534 | struct page *page, int tail) | 1508 | struct page *page, int tail) |
1535 | { | 1509 | { |
1536 | n->nr_partial++; | 1510 | n->nr_partial++; |
1537 | if (tail) | 1511 | if (tail == DEACTIVATE_TO_TAIL) |
1538 | list_add_tail(&page->lru, &n->partial); | 1512 | list_add_tail(&page->lru, &n->partial); |
1539 | else | 1513 | else |
1540 | list_add(&page->lru, &n->partial); | 1514 | list_add(&page->lru, &n->partial); |
@@ -1554,10 +1528,13 @@ static inline void remove_partial(struct kmem_cache_node *n, | |||
1554 | * Lock slab, remove from the partial list and put the object into the | 1528 | * Lock slab, remove from the partial list and put the object into the |
1555 | * per cpu freelist. | 1529 | * per cpu freelist. |
1556 | * | 1530 | * |
1531 | * Returns a list of objects or NULL if it fails. | ||
1532 | * | ||
1557 | * Must hold list_lock. | 1533 | * Must hold list_lock. |
1558 | */ | 1534 | */ |
1559 | static inline int acquire_slab(struct kmem_cache *s, | 1535 | static inline void *acquire_slab(struct kmem_cache *s, |
1560 | struct kmem_cache_node *n, struct page *page) | 1536 | struct kmem_cache_node *n, struct page *page, |
1537 | int mode) | ||
1561 | { | 1538 | { |
1562 | void *freelist; | 1539 | void *freelist; |
1563 | unsigned long counters; | 1540 | unsigned long counters; |
@@ -1572,7 +1549,8 @@ static inline int acquire_slab(struct kmem_cache *s, | |||
1572 | freelist = page->freelist; | 1549 | freelist = page->freelist; |
1573 | counters = page->counters; | 1550 | counters = page->counters; |
1574 | new.counters = counters; | 1551 | new.counters = counters; |
1575 | new.inuse = page->objects; | 1552 | if (mode) |
1553 | new.inuse = page->objects; | ||
1576 | 1554 | ||
1577 | VM_BUG_ON(new.frozen); | 1555 | VM_BUG_ON(new.frozen); |
1578 | new.frozen = 1; | 1556 | new.frozen = 1; |
@@ -1583,32 +1561,19 @@ static inline int acquire_slab(struct kmem_cache *s, | |||
1583 | "lock and freeze")); | 1561 | "lock and freeze")); |
1584 | 1562 | ||
1585 | remove_partial(n, page); | 1563 | remove_partial(n, page); |
1586 | 1564 | return freelist; | |
1587 | if (freelist) { | ||
1588 | /* Populate the per cpu freelist */ | ||
1589 | this_cpu_write(s->cpu_slab->freelist, freelist); | ||
1590 | this_cpu_write(s->cpu_slab->page, page); | ||
1591 | this_cpu_write(s->cpu_slab->node, page_to_nid(page)); | ||
1592 | return 1; | ||
1593 | } else { | ||
1594 | /* | ||
1595 | * Slab page came from the wrong list. No object to allocate | ||
1596 | * from. Put it onto the correct list and continue partial | ||
1597 | * scan. | ||
1598 | */ | ||
1599 | printk(KERN_ERR "SLUB: %s : Page without available objects on" | ||
1600 | " partial list\n", s->name); | ||
1601 | return 0; | ||
1602 | } | ||
1603 | } | 1565 | } |
1604 | 1566 | ||
1567 | static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); | ||
1568 | |||
1605 | /* | 1569 | /* |
1606 | * Try to allocate a partial slab from a specific node. | 1570 | * Try to allocate a partial slab from a specific node. |
1607 | */ | 1571 | */ |
1608 | static struct page *get_partial_node(struct kmem_cache *s, | 1572 | static void *get_partial_node(struct kmem_cache *s, |
1609 | struct kmem_cache_node *n) | 1573 | struct kmem_cache_node *n, struct kmem_cache_cpu *c) |
1610 | { | 1574 | { |
1611 | struct page *page; | 1575 | struct page *page, *page2; |
1576 | void *object = NULL; | ||
1612 | 1577 | ||
1613 | /* | 1578 | /* |
1614 | * Racy check. If we mistakenly see no partial slabs then we | 1579 | * Racy check. If we mistakenly see no partial slabs then we |
@@ -1620,26 +1585,43 @@ static struct page *get_partial_node(struct kmem_cache *s, | |||
1620 | return NULL; | 1585 | return NULL; |
1621 | 1586 | ||
1622 | spin_lock(&n->list_lock); | 1587 | spin_lock(&n->list_lock); |
1623 | list_for_each_entry(page, &n->partial, lru) | 1588 | list_for_each_entry_safe(page, page2, &n->partial, lru) { |
1624 | if (acquire_slab(s, n, page)) | 1589 | void *t = acquire_slab(s, n, page, object == NULL); |
1625 | goto out; | 1590 | int available; |
1626 | page = NULL; | 1591 | |
1627 | out: | 1592 | if (!t) |
1593 | break; | ||
1594 | |||
1595 | if (!object) { | ||
1596 | c->page = page; | ||
1597 | c->node = page_to_nid(page); | ||
1598 | stat(s, ALLOC_FROM_PARTIAL); | ||
1599 | object = t; | ||
1600 | available = page->objects - page->inuse; | ||
1601 | } else { | ||
1602 | page->freelist = t; | ||
1603 | available = put_cpu_partial(s, page, 0); | ||
1604 | } | ||
1605 | if (kmem_cache_debug(s) || available > s->cpu_partial / 2) | ||
1606 | break; | ||
1607 | |||
1608 | } | ||
1628 | spin_unlock(&n->list_lock); | 1609 | spin_unlock(&n->list_lock); |
1629 | return page; | 1610 | return object; |
1630 | } | 1611 | } |
1631 | 1612 | ||
1632 | /* | 1613 | /* |
1633 | * Get a page from somewhere. Search in increasing NUMA distances. | 1614 | * Get a page from somewhere. Search in increasing NUMA distances. |
1634 | */ | 1615 | */ |
1635 | static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | 1616 | static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, |
1617 | struct kmem_cache_cpu *c) | ||
1636 | { | 1618 | { |
1637 | #ifdef CONFIG_NUMA | 1619 | #ifdef CONFIG_NUMA |
1638 | struct zonelist *zonelist; | 1620 | struct zonelist *zonelist; |
1639 | struct zoneref *z; | 1621 | struct zoneref *z; |
1640 | struct zone *zone; | 1622 | struct zone *zone; |
1641 | enum zone_type high_zoneidx = gfp_zone(flags); | 1623 | enum zone_type high_zoneidx = gfp_zone(flags); |
1642 | struct page *page; | 1624 | void *object; |
1643 | 1625 | ||
1644 | /* | 1626 | /* |
1645 | * The defrag ratio allows a configuration of the tradeoffs between | 1627 | * The defrag ratio allows a configuration of the tradeoffs between |
@@ -1672,10 +1654,10 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1672 | 1654 | ||
1673 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && | 1655 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && |
1674 | n->nr_partial > s->min_partial) { | 1656 | n->nr_partial > s->min_partial) { |
1675 | page = get_partial_node(s, n); | 1657 | object = get_partial_node(s, n, c); |
1676 | if (page) { | 1658 | if (object) { |
1677 | put_mems_allowed(); | 1659 | put_mems_allowed(); |
1678 | return page; | 1660 | return object; |
1679 | } | 1661 | } |
1680 | } | 1662 | } |
1681 | } | 1663 | } |
@@ -1687,16 +1669,17 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1687 | /* | 1669 | /* |
1688 | * Get a partial page, lock it and return it. | 1670 | * Get a partial page, lock it and return it. |
1689 | */ | 1671 | */ |
1690 | static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) | 1672 | static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, |
1673 | struct kmem_cache_cpu *c) | ||
1691 | { | 1674 | { |
1692 | struct page *page; | 1675 | void *object; |
1693 | int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; | 1676 | int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; |
1694 | 1677 | ||
1695 | page = get_partial_node(s, get_node(s, searchnode)); | 1678 | object = get_partial_node(s, get_node(s, searchnode), c); |
1696 | if (page || node != NUMA_NO_NODE) | 1679 | if (object || node != NUMA_NO_NODE) |
1697 | return page; | 1680 | return object; |
1698 | 1681 | ||
1699 | return get_any_partial(s, flags); | 1682 | return get_any_partial(s, flags, c); |
1700 | } | 1683 | } |
1701 | 1684 | ||
1702 | #ifdef CONFIG_PREEMPT | 1685 | #ifdef CONFIG_PREEMPT |
@@ -1765,9 +1748,6 @@ void init_kmem_cache_cpus(struct kmem_cache *s) | |||
1765 | for_each_possible_cpu(cpu) | 1748 | for_each_possible_cpu(cpu) |
1766 | per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); | 1749 | per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); |
1767 | } | 1750 | } |
1768 | /* | ||
1769 | * Remove the cpu slab | ||
1770 | */ | ||
1771 | 1751 | ||
1772 | /* | 1752 | /* |
1773 | * Remove the cpu slab | 1753 | * Remove the cpu slab |
@@ -1781,13 +1761,13 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1781 | enum slab_modes l = M_NONE, m = M_NONE; | 1761 | enum slab_modes l = M_NONE, m = M_NONE; |
1782 | void *freelist; | 1762 | void *freelist; |
1783 | void *nextfree; | 1763 | void *nextfree; |
1784 | int tail = 0; | 1764 | int tail = DEACTIVATE_TO_HEAD; |
1785 | struct page new; | 1765 | struct page new; |
1786 | struct page old; | 1766 | struct page old; |
1787 | 1767 | ||
1788 | if (page->freelist) { | 1768 | if (page->freelist) { |
1789 | stat(s, DEACTIVATE_REMOTE_FREES); | 1769 | stat(s, DEACTIVATE_REMOTE_FREES); |
1790 | tail = 1; | 1770 | tail = DEACTIVATE_TO_TAIL; |
1791 | } | 1771 | } |
1792 | 1772 | ||
1793 | c->tid = next_tid(c->tid); | 1773 | c->tid = next_tid(c->tid); |
@@ -1893,7 +1873,7 @@ redo: | |||
1893 | if (m == M_PARTIAL) { | 1873 | if (m == M_PARTIAL) { |
1894 | 1874 | ||
1895 | add_partial(n, page, tail); | 1875 | add_partial(n, page, tail); |
1896 | stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); | 1876 | stat(s, tail); |
1897 | 1877 | ||
1898 | } else if (m == M_FULL) { | 1878 | } else if (m == M_FULL) { |
1899 | 1879 | ||
@@ -1920,6 +1900,123 @@ redo: | |||
1920 | } | 1900 | } |
1921 | } | 1901 | } |
1922 | 1902 | ||
1903 | /* Unfreeze all the cpu partial slabs */ | ||
1904 | static void unfreeze_partials(struct kmem_cache *s) | ||
1905 | { | ||
1906 | struct kmem_cache_node *n = NULL; | ||
1907 | struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); | ||
1908 | struct page *page; | ||
1909 | |||
1910 | while ((page = c->partial)) { | ||
1911 | enum slab_modes { M_PARTIAL, M_FREE }; | ||
1912 | enum slab_modes l, m; | ||
1913 | struct page new; | ||
1914 | struct page old; | ||
1915 | |||
1916 | c->partial = page->next; | ||
1917 | l = M_FREE; | ||
1918 | |||
1919 | do { | ||
1920 | |||
1921 | old.freelist = page->freelist; | ||
1922 | old.counters = page->counters; | ||
1923 | VM_BUG_ON(!old.frozen); | ||
1924 | |||
1925 | new.counters = old.counters; | ||
1926 | new.freelist = old.freelist; | ||
1927 | |||
1928 | new.frozen = 0; | ||
1929 | |||
1930 | if (!new.inuse && (!n || n->nr_partial > s->min_partial)) | ||
1931 | m = M_FREE; | ||
1932 | else { | ||
1933 | struct kmem_cache_node *n2 = get_node(s, | ||
1934 | page_to_nid(page)); | ||
1935 | |||
1936 | m = M_PARTIAL; | ||
1937 | if (n != n2) { | ||
1938 | if (n) | ||
1939 | spin_unlock(&n->list_lock); | ||
1940 | |||
1941 | n = n2; | ||
1942 | spin_lock(&n->list_lock); | ||
1943 | } | ||
1944 | } | ||
1945 | |||
1946 | if (l != m) { | ||
1947 | if (l == M_PARTIAL) | ||
1948 | remove_partial(n, page); | ||
1949 | else | ||
1950 | add_partial(n, page, 1); | ||
1951 | |||
1952 | l = m; | ||
1953 | } | ||
1954 | |||
1955 | } while (!cmpxchg_double_slab(s, page, | ||
1956 | old.freelist, old.counters, | ||
1957 | new.freelist, new.counters, | ||
1958 | "unfreezing slab")); | ||
1959 | |||
1960 | if (m == M_FREE) { | ||
1961 | stat(s, DEACTIVATE_EMPTY); | ||
1962 | discard_slab(s, page); | ||
1963 | stat(s, FREE_SLAB); | ||
1964 | } | ||
1965 | } | ||
1966 | |||
1967 | if (n) | ||
1968 | spin_unlock(&n->list_lock); | ||
1969 | } | ||
1970 | |||
1971 | /* | ||
1972 | * Put a page that was just frozen (in __slab_free) into a partial page | ||
1973 | * slot if available. This is done without interrupts disabled and without | ||
1974 | * preemption disabled. The cmpxchg is racy and may put the partial page | ||
1975 | * onto a random cpus partial slot. | ||
1976 | * | ||
1977 | * If we did not find a slot then simply move all the partials to the | ||
1978 | * per node partial list. | ||
1979 | */ | ||
1980 | int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) | ||
1981 | { | ||
1982 | struct page *oldpage; | ||
1983 | int pages; | ||
1984 | int pobjects; | ||
1985 | |||
1986 | do { | ||
1987 | pages = 0; | ||
1988 | pobjects = 0; | ||
1989 | oldpage = this_cpu_read(s->cpu_slab->partial); | ||
1990 | |||
1991 | if (oldpage) { | ||
1992 | pobjects = oldpage->pobjects; | ||
1993 | pages = oldpage->pages; | ||
1994 | if (drain && pobjects > s->cpu_partial) { | ||
1995 | unsigned long flags; | ||
1996 | /* | ||
1997 | * partial array is full. Move the existing | ||
1998 | * set to the per node partial list. | ||
1999 | */ | ||
2000 | local_irq_save(flags); | ||
2001 | unfreeze_partials(s); | ||
2002 | local_irq_restore(flags); | ||
2003 | pobjects = 0; | ||
2004 | pages = 0; | ||
2005 | } | ||
2006 | } | ||
2007 | |||
2008 | pages++; | ||
2009 | pobjects += page->objects - page->inuse; | ||
2010 | |||
2011 | page->pages = pages; | ||
2012 | page->pobjects = pobjects; | ||
2013 | page->next = oldpage; | ||
2014 | |||
2015 | } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); | ||
2016 | stat(s, CPU_PARTIAL_FREE); | ||
2017 | return pobjects; | ||
2018 | } | ||
2019 | |||
1923 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 2020 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1924 | { | 2021 | { |
1925 | stat(s, CPUSLAB_FLUSH); | 2022 | stat(s, CPUSLAB_FLUSH); |
@@ -1935,8 +2032,12 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) | |||
1935 | { | 2032 | { |
1936 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); | 2033 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
1937 | 2034 | ||
1938 | if (likely(c && c->page)) | 2035 | if (likely(c)) { |
1939 | flush_slab(s, c); | 2036 | if (c->page) |
2037 | flush_slab(s, c); | ||
2038 | |||
2039 | unfreeze_partials(s); | ||
2040 | } | ||
1940 | } | 2041 | } |
1941 | 2042 | ||
1942 | static void flush_cpu_slab(void *d) | 2043 | static void flush_cpu_slab(void *d) |
@@ -2027,12 +2128,39 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) | |||
2027 | } | 2128 | } |
2028 | } | 2129 | } |
2029 | 2130 | ||
2131 | static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, | ||
2132 | int node, struct kmem_cache_cpu **pc) | ||
2133 | { | ||
2134 | void *object; | ||
2135 | struct kmem_cache_cpu *c; | ||
2136 | struct page *page = new_slab(s, flags, node); | ||
2137 | |||
2138 | if (page) { | ||
2139 | c = __this_cpu_ptr(s->cpu_slab); | ||
2140 | if (c->page) | ||
2141 | flush_slab(s, c); | ||
2142 | |||
2143 | /* | ||
2144 | * No other reference to the page yet so we can | ||
2145 | * muck around with it freely without cmpxchg | ||
2146 | */ | ||
2147 | object = page->freelist; | ||
2148 | page->freelist = NULL; | ||
2149 | |||
2150 | stat(s, ALLOC_SLAB); | ||
2151 | c->node = page_to_nid(page); | ||
2152 | c->page = page; | ||
2153 | *pc = c; | ||
2154 | } else | ||
2155 | object = NULL; | ||
2156 | |||
2157 | return object; | ||
2158 | } | ||
2159 | |||
2030 | /* | 2160 | /* |
2031 | * Slow path. The lockless freelist is empty or we need to perform | 2161 | * Slow path. The lockless freelist is empty or we need to perform |
2032 | * debugging duties. | 2162 | * debugging duties. |
2033 | * | 2163 | * |
2034 | * Interrupts are disabled. | ||
2035 | * | ||
2036 | * Processing is still very fast if new objects have been freed to the | 2164 | * Processing is still very fast if new objects have been freed to the |
2037 | * regular freelist. In that case we simply take over the regular freelist | 2165 | * regular freelist. In that case we simply take over the regular freelist |
2038 | * as the lockless freelist and zap the regular freelist. | 2166 | * as the lockless freelist and zap the regular freelist. |
@@ -2049,7 +2177,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
2049 | unsigned long addr, struct kmem_cache_cpu *c) | 2177 | unsigned long addr, struct kmem_cache_cpu *c) |
2050 | { | 2178 | { |
2051 | void **object; | 2179 | void **object; |
2052 | struct page *page; | ||
2053 | unsigned long flags; | 2180 | unsigned long flags; |
2054 | struct page new; | 2181 | struct page new; |
2055 | unsigned long counters; | 2182 | unsigned long counters; |
@@ -2064,13 +2191,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
2064 | c = this_cpu_ptr(s->cpu_slab); | 2191 | c = this_cpu_ptr(s->cpu_slab); |
2065 | #endif | 2192 | #endif |
2066 | 2193 | ||
2067 | /* We handle __GFP_ZERO in the caller */ | 2194 | if (!c->page) |
2068 | gfpflags &= ~__GFP_ZERO; | ||
2069 | |||
2070 | page = c->page; | ||
2071 | if (!page) | ||
2072 | goto new_slab; | 2195 | goto new_slab; |
2073 | 2196 | redo: | |
2074 | if (unlikely(!node_match(c, node))) { | 2197 | if (unlikely(!node_match(c, node))) { |
2075 | stat(s, ALLOC_NODE_MISMATCH); | 2198 | stat(s, ALLOC_NODE_MISMATCH); |
2076 | deactivate_slab(s, c); | 2199 | deactivate_slab(s, c); |
@@ -2080,8 +2203,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
2080 | stat(s, ALLOC_SLOWPATH); | 2203 | stat(s, ALLOC_SLOWPATH); |
2081 | 2204 | ||
2082 | do { | 2205 | do { |
2083 | object = page->freelist; | 2206 | object = c->page->freelist; |
2084 | counters = page->counters; | 2207 | counters = c->page->counters; |
2085 | new.counters = counters; | 2208 | new.counters = counters; |
2086 | VM_BUG_ON(!new.frozen); | 2209 | VM_BUG_ON(!new.frozen); |
2087 | 2210 | ||
@@ -2093,17 +2216,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
2093 | * | 2216 | * |
2094 | * If there are objects left then we retrieve them | 2217 | * If there are objects left then we retrieve them |
2095 | * and use them to refill the per cpu queue. | 2218 | * and use them to refill the per cpu queue. |
2096 | */ | 2219 | */ |
2097 | 2220 | ||
2098 | new.inuse = page->objects; | 2221 | new.inuse = c->page->objects; |
2099 | new.frozen = object != NULL; | 2222 | new.frozen = object != NULL; |
2100 | 2223 | ||
2101 | } while (!__cmpxchg_double_slab(s, page, | 2224 | } while (!__cmpxchg_double_slab(s, c->page, |
2102 | object, counters, | 2225 | object, counters, |
2103 | NULL, new.counters, | 2226 | NULL, new.counters, |
2104 | "__slab_alloc")); | 2227 | "__slab_alloc")); |
2105 | 2228 | ||
2106 | if (unlikely(!object)) { | 2229 | if (!object) { |
2107 | c->page = NULL; | 2230 | c->page = NULL; |
2108 | stat(s, DEACTIVATE_BYPASS); | 2231 | stat(s, DEACTIVATE_BYPASS); |
2109 | goto new_slab; | 2232 | goto new_slab; |
@@ -2112,58 +2235,47 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
2112 | stat(s, ALLOC_REFILL); | 2235 | stat(s, ALLOC_REFILL); |
2113 | 2236 | ||
2114 | load_freelist: | 2237 | load_freelist: |
2115 | VM_BUG_ON(!page->frozen); | ||
2116 | c->freelist = get_freepointer(s, object); | 2238 | c->freelist = get_freepointer(s, object); |
2117 | c->tid = next_tid(c->tid); | 2239 | c->tid = next_tid(c->tid); |
2118 | local_irq_restore(flags); | 2240 | local_irq_restore(flags); |
2119 | return object; | 2241 | return object; |
2120 | 2242 | ||
2121 | new_slab: | 2243 | new_slab: |
2122 | page = get_partial(s, gfpflags, node); | ||
2123 | if (page) { | ||
2124 | stat(s, ALLOC_FROM_PARTIAL); | ||
2125 | object = c->freelist; | ||
2126 | 2244 | ||
2127 | if (kmem_cache_debug(s)) | 2245 | if (c->partial) { |
2128 | goto debug; | 2246 | c->page = c->partial; |
2129 | goto load_freelist; | 2247 | c->partial = c->page->next; |
2248 | c->node = page_to_nid(c->page); | ||
2249 | stat(s, CPU_PARTIAL_ALLOC); | ||
2250 | c->freelist = NULL; | ||
2251 | goto redo; | ||
2130 | } | 2252 | } |
2131 | 2253 | ||
2132 | page = new_slab(s, gfpflags, node); | 2254 | /* Then do expensive stuff like retrieving pages from the partial lists */ |
2255 | object = get_partial(s, gfpflags, node, c); | ||
2133 | 2256 | ||
2134 | if (page) { | 2257 | if (unlikely(!object)) { |
2135 | c = __this_cpu_ptr(s->cpu_slab); | ||
2136 | if (c->page) | ||
2137 | flush_slab(s, c); | ||
2138 | 2258 | ||
2139 | /* | 2259 | object = new_slab_objects(s, gfpflags, node, &c); |
2140 | * No other reference to the page yet so we can | ||
2141 | * muck around with it freely without cmpxchg | ||
2142 | */ | ||
2143 | object = page->freelist; | ||
2144 | page->freelist = NULL; | ||
2145 | page->inuse = page->objects; | ||
2146 | 2260 | ||
2147 | stat(s, ALLOC_SLAB); | 2261 | if (unlikely(!object)) { |
2148 | c->node = page_to_nid(page); | 2262 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) |
2149 | c->page = page; | 2263 | slab_out_of_memory(s, gfpflags, node); |
2150 | 2264 | ||
2151 | if (kmem_cache_debug(s)) | 2265 | local_irq_restore(flags); |
2152 | goto debug; | 2266 | return NULL; |
2153 | goto load_freelist; | 2267 | } |
2154 | } | 2268 | } |
2155 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) | ||
2156 | slab_out_of_memory(s, gfpflags, node); | ||
2157 | local_irq_restore(flags); | ||
2158 | return NULL; | ||
2159 | 2269 | ||
2160 | debug: | 2270 | if (likely(!kmem_cache_debug(s))) |
2161 | if (!object || !alloc_debug_processing(s, page, object, addr)) | 2271 | goto load_freelist; |
2162 | goto new_slab; | 2272 | |
2273 | /* Only entered in the debug case */ | ||
2274 | if (!alloc_debug_processing(s, c->page, object, addr)) | ||
2275 | goto new_slab; /* Slab failed checks. Next slab needed */ | ||
2163 | 2276 | ||
2164 | c->freelist = get_freepointer(s, object); | 2277 | c->freelist = get_freepointer(s, object); |
2165 | deactivate_slab(s, c); | 2278 | deactivate_slab(s, c); |
2166 | c->page = NULL; | ||
2167 | c->node = NUMA_NO_NODE; | 2279 | c->node = NUMA_NO_NODE; |
2168 | local_irq_restore(flags); | 2280 | local_irq_restore(flags); |
2169 | return object; | 2281 | return object; |
@@ -2333,16 +2445,29 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2333 | was_frozen = new.frozen; | 2445 | was_frozen = new.frozen; |
2334 | new.inuse--; | 2446 | new.inuse--; |
2335 | if ((!new.inuse || !prior) && !was_frozen && !n) { | 2447 | if ((!new.inuse || !prior) && !was_frozen && !n) { |
2336 | n = get_node(s, page_to_nid(page)); | 2448 | |
2337 | /* | 2449 | if (!kmem_cache_debug(s) && !prior) |
2338 | * Speculatively acquire the list_lock. | 2450 | |
2339 | * If the cmpxchg does not succeed then we may | 2451 | /* |
2340 | * drop the list_lock without any processing. | 2452 | * Slab was on no list before and will be partially empty |
2341 | * | 2453 | * We can defer the list move and instead freeze it. |
2342 | * Otherwise the list_lock will synchronize with | 2454 | */ |
2343 | * other processors updating the list of slabs. | 2455 | new.frozen = 1; |
2344 | */ | 2456 | |
2345 | spin_lock_irqsave(&n->list_lock, flags); | 2457 | else { /* Needs to be taken off a list */ |
2458 | |||
2459 | n = get_node(s, page_to_nid(page)); | ||
2460 | /* | ||
2461 | * Speculatively acquire the list_lock. | ||
2462 | * If the cmpxchg does not succeed then we may | ||
2463 | * drop the list_lock without any processing. | ||
2464 | * | ||
2465 | * Otherwise the list_lock will synchronize with | ||
2466 | * other processors updating the list of slabs. | ||
2467 | */ | ||
2468 | spin_lock_irqsave(&n->list_lock, flags); | ||
2469 | |||
2470 | } | ||
2346 | } | 2471 | } |
2347 | inuse = new.inuse; | 2472 | inuse = new.inuse; |
2348 | 2473 | ||
@@ -2352,7 +2477,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2352 | "__slab_free")); | 2477 | "__slab_free")); |
2353 | 2478 | ||
2354 | if (likely(!n)) { | 2479 | if (likely(!n)) { |
2355 | /* | 2480 | |
2481 | /* | ||
2482 | * If we just froze the page then put it onto the | ||
2483 | * per cpu partial list. | ||
2484 | */ | ||
2485 | if (new.frozen && !was_frozen) | ||
2486 | put_cpu_partial(s, page, 1); | ||
2487 | |||
2488 | /* | ||
2356 | * The list lock was not taken therefore no list | 2489 | * The list lock was not taken therefore no list |
2357 | * activity can be necessary. | 2490 | * activity can be necessary. |
2358 | */ | 2491 | */ |
@@ -2377,7 +2510,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2377 | */ | 2510 | */ |
2378 | if (unlikely(!prior)) { | 2511 | if (unlikely(!prior)) { |
2379 | remove_full(s, page); | 2512 | remove_full(s, page); |
2380 | add_partial(n, page, 1); | 2513 | add_partial(n, page, DEACTIVATE_TO_TAIL); |
2381 | stat(s, FREE_ADD_PARTIAL); | 2514 | stat(s, FREE_ADD_PARTIAL); |
2382 | } | 2515 | } |
2383 | } | 2516 | } |
@@ -2421,7 +2554,6 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
2421 | slab_free_hook(s, x); | 2554 | slab_free_hook(s, x); |
2422 | 2555 | ||
2423 | redo: | 2556 | redo: |
2424 | |||
2425 | /* | 2557 | /* |
2426 | * Determine the currently cpus per cpu slab. | 2558 | * Determine the currently cpus per cpu slab. |
2427 | * The cpu may change afterward. However that does not matter since | 2559 | * The cpu may change afterward. However that does not matter since |
@@ -2685,7 +2817,7 @@ static void early_kmem_cache_node_alloc(int node) | |||
2685 | n = page->freelist; | 2817 | n = page->freelist; |
2686 | BUG_ON(!n); | 2818 | BUG_ON(!n); |
2687 | page->freelist = get_freepointer(kmem_cache_node, n); | 2819 | page->freelist = get_freepointer(kmem_cache_node, n); |
2688 | page->inuse++; | 2820 | page->inuse = 1; |
2689 | page->frozen = 0; | 2821 | page->frozen = 0; |
2690 | kmem_cache_node->node[node] = n; | 2822 | kmem_cache_node->node[node] = n; |
2691 | #ifdef CONFIG_SLUB_DEBUG | 2823 | #ifdef CONFIG_SLUB_DEBUG |
@@ -2695,7 +2827,7 @@ static void early_kmem_cache_node_alloc(int node) | |||
2695 | init_kmem_cache_node(n, kmem_cache_node); | 2827 | init_kmem_cache_node(n, kmem_cache_node); |
2696 | inc_slabs_node(kmem_cache_node, node, page->objects); | 2828 | inc_slabs_node(kmem_cache_node, node, page->objects); |
2697 | 2829 | ||
2698 | add_partial(n, page, 0); | 2830 | add_partial(n, page, DEACTIVATE_TO_HEAD); |
2699 | } | 2831 | } |
2700 | 2832 | ||
2701 | static void free_kmem_cache_nodes(struct kmem_cache *s) | 2833 | static void free_kmem_cache_nodes(struct kmem_cache *s) |
@@ -2911,7 +3043,34 @@ static int kmem_cache_open(struct kmem_cache *s, | |||
2911 | * The larger the object size is, the more pages we want on the partial | 3043 | * The larger the object size is, the more pages we want on the partial |
2912 | * list to avoid pounding the page allocator excessively. | 3044 | * list to avoid pounding the page allocator excessively. |
2913 | */ | 3045 | */ |
2914 | set_min_partial(s, ilog2(s->size)); | 3046 | set_min_partial(s, ilog2(s->size) / 2); |
3047 | |||
3048 | /* | ||
3049 | * cpu_partial determined the maximum number of objects kept in the | ||
3050 | * per cpu partial lists of a processor. | ||
3051 | * | ||
3052 | * Per cpu partial lists mainly contain slabs that just have one | ||
3053 | * object freed. If they are used for allocation then they can be | ||
3054 | * filled up again with minimal effort. The slab will never hit the | ||
3055 | * per node partial lists and therefore no locking will be required. | ||
3056 | * | ||
3057 | * This setting also determines | ||
3058 | * | ||
3059 | * A) The number of objects from per cpu partial slabs dumped to the | ||
3060 | * per node list when we reach the limit. | ||
3061 | * B) The number of objects in cpu partial slabs to extract from the | ||
3062 | * per node list when we run out of per cpu objects. We only fetch 50% | ||
3063 | * to keep some capacity around for frees. | ||
3064 | */ | ||
3065 | if (s->size >= PAGE_SIZE) | ||
3066 | s->cpu_partial = 2; | ||
3067 | else if (s->size >= 1024) | ||
3068 | s->cpu_partial = 6; | ||
3069 | else if (s->size >= 256) | ||
3070 | s->cpu_partial = 13; | ||
3071 | else | ||
3072 | s->cpu_partial = 30; | ||
3073 | |||
2915 | s->refcount = 1; | 3074 | s->refcount = 1; |
2916 | #ifdef CONFIG_NUMA | 3075 | #ifdef CONFIG_NUMA |
2917 | s->remote_node_defrag_ratio = 1000; | 3076 | s->remote_node_defrag_ratio = 1000; |
@@ -2970,13 +3129,13 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, | |||
2970 | 3129 | ||
2971 | /* | 3130 | /* |
2972 | * Attempt to free all partial slabs on a node. | 3131 | * Attempt to free all partial slabs on a node. |
3132 | * This is called from kmem_cache_close(). We must be the last thread | ||
3133 | * using the cache and therefore we do not need to lock anymore. | ||
2973 | */ | 3134 | */ |
2974 | static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | 3135 | static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) |
2975 | { | 3136 | { |
2976 | unsigned long flags; | ||
2977 | struct page *page, *h; | 3137 | struct page *page, *h; |
2978 | 3138 | ||
2979 | spin_lock_irqsave(&n->list_lock, flags); | ||
2980 | list_for_each_entry_safe(page, h, &n->partial, lru) { | 3139 | list_for_each_entry_safe(page, h, &n->partial, lru) { |
2981 | if (!page->inuse) { | 3140 | if (!page->inuse) { |
2982 | remove_partial(n, page); | 3141 | remove_partial(n, page); |
@@ -2986,7 +3145,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | |||
2986 | "Objects remaining on kmem_cache_close()"); | 3145 | "Objects remaining on kmem_cache_close()"); |
2987 | } | 3146 | } |
2988 | } | 3147 | } |
2989 | spin_unlock_irqrestore(&n->list_lock, flags); | ||
2990 | } | 3148 | } |
2991 | 3149 | ||
2992 | /* | 3150 | /* |
@@ -3020,6 +3178,7 @@ void kmem_cache_destroy(struct kmem_cache *s) | |||
3020 | s->refcount--; | 3178 | s->refcount--; |
3021 | if (!s->refcount) { | 3179 | if (!s->refcount) { |
3022 | list_del(&s->list); | 3180 | list_del(&s->list); |
3181 | up_write(&slub_lock); | ||
3023 | if (kmem_cache_close(s)) { | 3182 | if (kmem_cache_close(s)) { |
3024 | printk(KERN_ERR "SLUB %s: %s called for cache that " | 3183 | printk(KERN_ERR "SLUB %s: %s called for cache that " |
3025 | "still has objects.\n", s->name, __func__); | 3184 | "still has objects.\n", s->name, __func__); |
@@ -3028,8 +3187,8 @@ void kmem_cache_destroy(struct kmem_cache *s) | |||
3028 | if (s->flags & SLAB_DESTROY_BY_RCU) | 3187 | if (s->flags & SLAB_DESTROY_BY_RCU) |
3029 | rcu_barrier(); | 3188 | rcu_barrier(); |
3030 | sysfs_slab_remove(s); | 3189 | sysfs_slab_remove(s); |
3031 | } | 3190 | } else |
3032 | up_write(&slub_lock); | 3191 | up_write(&slub_lock); |
3033 | } | 3192 | } |
3034 | EXPORT_SYMBOL(kmem_cache_destroy); | 3193 | EXPORT_SYMBOL(kmem_cache_destroy); |
3035 | 3194 | ||
@@ -3347,23 +3506,23 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
3347 | * list_lock. page->inuse here is the upper limit. | 3506 | * list_lock. page->inuse here is the upper limit. |
3348 | */ | 3507 | */ |
3349 | list_for_each_entry_safe(page, t, &n->partial, lru) { | 3508 | list_for_each_entry_safe(page, t, &n->partial, lru) { |
3350 | if (!page->inuse) { | 3509 | list_move(&page->lru, slabs_by_inuse + page->inuse); |
3351 | remove_partial(n, page); | 3510 | if (!page->inuse) |
3352 | discard_slab(s, page); | 3511 | n->nr_partial--; |
3353 | } else { | ||
3354 | list_move(&page->lru, | ||
3355 | slabs_by_inuse + page->inuse); | ||
3356 | } | ||
3357 | } | 3512 | } |
3358 | 3513 | ||
3359 | /* | 3514 | /* |
3360 | * Rebuild the partial list with the slabs filled up most | 3515 | * Rebuild the partial list with the slabs filled up most |
3361 | * first and the least used slabs at the end. | 3516 | * first and the least used slabs at the end. |
3362 | */ | 3517 | */ |
3363 | for (i = objects - 1; i >= 0; i--) | 3518 | for (i = objects - 1; i > 0; i--) |
3364 | list_splice(slabs_by_inuse + i, n->partial.prev); | 3519 | list_splice(slabs_by_inuse + i, n->partial.prev); |
3365 | 3520 | ||
3366 | spin_unlock_irqrestore(&n->list_lock, flags); | 3521 | spin_unlock_irqrestore(&n->list_lock, flags); |
3522 | |||
3523 | /* Release empty slabs */ | ||
3524 | list_for_each_entry_safe(page, t, slabs_by_inuse, lru) | ||
3525 | discard_slab(s, page); | ||
3367 | } | 3526 | } |
3368 | 3527 | ||
3369 | kfree(slabs_by_inuse); | 3528 | kfree(slabs_by_inuse); |
@@ -4319,6 +4478,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
4319 | 4478 | ||
4320 | for_each_possible_cpu(cpu) { | 4479 | for_each_possible_cpu(cpu) { |
4321 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); | 4480 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
4481 | struct page *page; | ||
4322 | 4482 | ||
4323 | if (!c || c->node < 0) | 4483 | if (!c || c->node < 0) |
4324 | continue; | 4484 | continue; |
@@ -4334,6 +4494,13 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
4334 | total += x; | 4494 | total += x; |
4335 | nodes[c->node] += x; | 4495 | nodes[c->node] += x; |
4336 | } | 4496 | } |
4497 | page = c->partial; | ||
4498 | |||
4499 | if (page) { | ||
4500 | x = page->pobjects; | ||
4501 | total += x; | ||
4502 | nodes[c->node] += x; | ||
4503 | } | ||
4337 | per_cpu[c->node]++; | 4504 | per_cpu[c->node]++; |
4338 | } | 4505 | } |
4339 | } | 4506 | } |
@@ -4412,11 +4579,12 @@ struct slab_attribute { | |||
4412 | }; | 4579 | }; |
4413 | 4580 | ||
4414 | #define SLAB_ATTR_RO(_name) \ | 4581 | #define SLAB_ATTR_RO(_name) \ |
4415 | static struct slab_attribute _name##_attr = __ATTR_RO(_name) | 4582 | static struct slab_attribute _name##_attr = \ |
4583 | __ATTR(_name, 0400, _name##_show, NULL) | ||
4416 | 4584 | ||
4417 | #define SLAB_ATTR(_name) \ | 4585 | #define SLAB_ATTR(_name) \ |
4418 | static struct slab_attribute _name##_attr = \ | 4586 | static struct slab_attribute _name##_attr = \ |
4419 | __ATTR(_name, 0644, _name##_show, _name##_store) | 4587 | __ATTR(_name, 0600, _name##_show, _name##_store) |
4420 | 4588 | ||
4421 | static ssize_t slab_size_show(struct kmem_cache *s, char *buf) | 4589 | static ssize_t slab_size_show(struct kmem_cache *s, char *buf) |
4422 | { | 4590 | { |
@@ -4485,6 +4653,27 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, | |||
4485 | } | 4653 | } |
4486 | SLAB_ATTR(min_partial); | 4654 | SLAB_ATTR(min_partial); |
4487 | 4655 | ||
4656 | static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) | ||
4657 | { | ||
4658 | return sprintf(buf, "%u\n", s->cpu_partial); | ||
4659 | } | ||
4660 | |||
4661 | static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, | ||
4662 | size_t length) | ||
4663 | { | ||
4664 | unsigned long objects; | ||
4665 | int err; | ||
4666 | |||
4667 | err = strict_strtoul(buf, 10, &objects); | ||
4668 | if (err) | ||
4669 | return err; | ||
4670 | |||
4671 | s->cpu_partial = objects; | ||
4672 | flush_all(s); | ||
4673 | return length; | ||
4674 | } | ||
4675 | SLAB_ATTR(cpu_partial); | ||
4676 | |||
4488 | static ssize_t ctor_show(struct kmem_cache *s, char *buf) | 4677 | static ssize_t ctor_show(struct kmem_cache *s, char *buf) |
4489 | { | 4678 | { |
4490 | if (!s->ctor) | 4679 | if (!s->ctor) |
@@ -4523,6 +4712,37 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) | |||
4523 | } | 4712 | } |
4524 | SLAB_ATTR_RO(objects_partial); | 4713 | SLAB_ATTR_RO(objects_partial); |
4525 | 4714 | ||
4715 | static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) | ||
4716 | { | ||
4717 | int objects = 0; | ||
4718 | int pages = 0; | ||
4719 | int cpu; | ||
4720 | int len; | ||
4721 | |||
4722 | for_each_online_cpu(cpu) { | ||
4723 | struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial; | ||
4724 | |||
4725 | if (page) { | ||
4726 | pages += page->pages; | ||
4727 | objects += page->pobjects; | ||
4728 | } | ||
4729 | } | ||
4730 | |||
4731 | len = sprintf(buf, "%d(%d)", objects, pages); | ||
4732 | |||
4733 | #ifdef CONFIG_SMP | ||
4734 | for_each_online_cpu(cpu) { | ||
4735 | struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial; | ||
4736 | |||
4737 | if (page && len < PAGE_SIZE - 20) | ||
4738 | len += sprintf(buf + len, " C%d=%d(%d)", cpu, | ||
4739 | page->pobjects, page->pages); | ||
4740 | } | ||
4741 | #endif | ||
4742 | return len + sprintf(buf + len, "\n"); | ||
4743 | } | ||
4744 | SLAB_ATTR_RO(slabs_cpu_partial); | ||
4745 | |||
4526 | static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) | 4746 | static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) |
4527 | { | 4747 | { |
4528 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); | 4748 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); |
@@ -4845,6 +5065,8 @@ STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass); | |||
4845 | STAT_ATTR(ORDER_FALLBACK, order_fallback); | 5065 | STAT_ATTR(ORDER_FALLBACK, order_fallback); |
4846 | STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); | 5066 | STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); |
4847 | STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); | 5067 | STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); |
5068 | STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc); | ||
5069 | STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); | ||
4848 | #endif | 5070 | #endif |
4849 | 5071 | ||
4850 | static struct attribute *slab_attrs[] = { | 5072 | static struct attribute *slab_attrs[] = { |
@@ -4853,6 +5075,7 @@ static struct attribute *slab_attrs[] = { | |||
4853 | &objs_per_slab_attr.attr, | 5075 | &objs_per_slab_attr.attr, |
4854 | &order_attr.attr, | 5076 | &order_attr.attr, |
4855 | &min_partial_attr.attr, | 5077 | &min_partial_attr.attr, |
5078 | &cpu_partial_attr.attr, | ||
4856 | &objects_attr.attr, | 5079 | &objects_attr.attr, |
4857 | &objects_partial_attr.attr, | 5080 | &objects_partial_attr.attr, |
4858 | &partial_attr.attr, | 5081 | &partial_attr.attr, |
@@ -4865,6 +5088,7 @@ static struct attribute *slab_attrs[] = { | |||
4865 | &destroy_by_rcu_attr.attr, | 5088 | &destroy_by_rcu_attr.attr, |
4866 | &shrink_attr.attr, | 5089 | &shrink_attr.attr, |
4867 | &reserved_attr.attr, | 5090 | &reserved_attr.attr, |
5091 | &slabs_cpu_partial_attr.attr, | ||
4868 | #ifdef CONFIG_SLUB_DEBUG | 5092 | #ifdef CONFIG_SLUB_DEBUG |
4869 | &total_objects_attr.attr, | 5093 | &total_objects_attr.attr, |
4870 | &slabs_attr.attr, | 5094 | &slabs_attr.attr, |
@@ -4906,6 +5130,8 @@ static struct attribute *slab_attrs[] = { | |||
4906 | &order_fallback_attr.attr, | 5130 | &order_fallback_attr.attr, |
4907 | &cmpxchg_double_fail_attr.attr, | 5131 | &cmpxchg_double_fail_attr.attr, |
4908 | &cmpxchg_double_cpu_fail_attr.attr, | 5132 | &cmpxchg_double_cpu_fail_attr.attr, |
5133 | &cpu_partial_alloc_attr.attr, | ||
5134 | &cpu_partial_free_attr.attr, | ||
4909 | #endif | 5135 | #endif |
4910 | #ifdef CONFIG_FAILSLAB | 5136 | #ifdef CONFIG_FAILSLAB |
4911 | &failslab_attr.attr, | 5137 | &failslab_attr.attr, |
@@ -5257,7 +5483,7 @@ static const struct file_operations proc_slabinfo_operations = { | |||
5257 | 5483 | ||
5258 | static int __init slab_proc_init(void) | 5484 | static int __init slab_proc_init(void) |
5259 | { | 5485 | { |
5260 | proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); | 5486 | proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations); |
5261 | return 0; | 5487 | return 0; |
5262 | } | 5488 | } |
5263 | module_init(slab_proc_init); | 5489 | module_init(slab_proc_init); |
diff --git a/tools/slub/slabinfo.c b/tools/slub/slabinfo.c index 868cc93f7ac2..164cbcf61106 100644 --- a/tools/slub/slabinfo.c +++ b/tools/slub/slabinfo.c | |||
@@ -42,6 +42,7 @@ struct slabinfo { | |||
42 | unsigned long deactivate_remote_frees, order_fallback; | 42 | unsigned long deactivate_remote_frees, order_fallback; |
43 | unsigned long cmpxchg_double_cpu_fail, cmpxchg_double_fail; | 43 | unsigned long cmpxchg_double_cpu_fail, cmpxchg_double_fail; |
44 | unsigned long alloc_node_mismatch, deactivate_bypass; | 44 | unsigned long alloc_node_mismatch, deactivate_bypass; |
45 | unsigned long cpu_partial_alloc, cpu_partial_free; | ||
45 | int numa[MAX_NODES]; | 46 | int numa[MAX_NODES]; |
46 | int numa_partial[MAX_NODES]; | 47 | int numa_partial[MAX_NODES]; |
47 | } slabinfo[MAX_SLABS]; | 48 | } slabinfo[MAX_SLABS]; |
@@ -455,6 +456,11 @@ static void slab_stats(struct slabinfo *s) | |||
455 | s->alloc_from_partial * 100 / total_alloc, | 456 | s->alloc_from_partial * 100 / total_alloc, |
456 | s->free_remove_partial * 100 / total_free); | 457 | s->free_remove_partial * 100 / total_free); |
457 | 458 | ||
459 | printf("Cpu partial list %8lu %8lu %3lu %3lu\n", | ||
460 | s->cpu_partial_alloc, s->cpu_partial_free, | ||
461 | s->cpu_partial_alloc * 100 / total_alloc, | ||
462 | s->cpu_partial_free * 100 / total_free); | ||
463 | |||
458 | printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", | 464 | printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", |
459 | s->deactivate_remote_frees, s->free_frozen, | 465 | s->deactivate_remote_frees, s->free_frozen, |
460 | s->deactivate_remote_frees * 100 / total_alloc, | 466 | s->deactivate_remote_frees * 100 / total_alloc, |
@@ -1145,7 +1151,7 @@ static void read_slab_dir(void) | |||
1145 | switch (de->d_type) { | 1151 | switch (de->d_type) { |
1146 | case DT_LNK: | 1152 | case DT_LNK: |
1147 | alias->name = strdup(de->d_name); | 1153 | alias->name = strdup(de->d_name); |
1148 | count = readlink(de->d_name, buffer, sizeof(buffer)); | 1154 | count = readlink(de->d_name, buffer, sizeof(buffer)-1); |
1149 | 1155 | ||
1150 | if (count < 0) | 1156 | if (count < 0) |
1151 | fatal("Cannot read symlink %s\n", de->d_name); | 1157 | fatal("Cannot read symlink %s\n", de->d_name); |
@@ -1209,6 +1215,8 @@ static void read_slab_dir(void) | |||
1209 | slab->order_fallback = get_obj("order_fallback"); | 1215 | slab->order_fallback = get_obj("order_fallback"); |
1210 | slab->cmpxchg_double_cpu_fail = get_obj("cmpxchg_double_cpu_fail"); | 1216 | slab->cmpxchg_double_cpu_fail = get_obj("cmpxchg_double_cpu_fail"); |
1211 | slab->cmpxchg_double_fail = get_obj("cmpxchg_double_fail"); | 1217 | slab->cmpxchg_double_fail = get_obj("cmpxchg_double_fail"); |
1218 | slab->cpu_partial_alloc = get_obj("cpu_partial_alloc"); | ||
1219 | slab->cpu_partial_free = get_obj("cpu_partial_free"); | ||
1212 | slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); | 1220 | slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); |
1213 | slab->deactivate_bypass = get_obj("deactivate_bypass"); | 1221 | slab->deactivate_bypass = get_obj("deactivate_bypass"); |
1214 | chdir(".."); | 1222 | chdir(".."); |