aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c558
1 files changed, 392 insertions, 166 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 7c54fe83a90c..95215aa6a75e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -467,34 +467,8 @@ static int disable_higher_order_debug;
467 */ 467 */
468static void print_section(char *text, u8 *addr, unsigned int length) 468static void print_section(char *text, u8 *addr, unsigned int length)
469{ 469{
470 int i, offset; 470 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
471 int newline = 1; 471 length, 1);
472 char ascii[17];
473
474 ascii[16] = 0;
475
476 for (i = 0; i < length; i++) {
477 if (newline) {
478 printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
479 newline = 0;
480 }
481 printk(KERN_CONT " %02x", addr[i]);
482 offset = i % 16;
483 ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
484 if (offset == 15) {
485 printk(KERN_CONT " %s\n", ascii);
486 newline = 1;
487 }
488 }
489 if (!newline) {
490 i %= 16;
491 while (i < 16) {
492 printk(KERN_CONT " ");
493 ascii[i] = ' ';
494 i++;
495 }
496 printk(KERN_CONT " %s\n", ascii);
497 }
498} 472}
499 473
500static struct track *get_track(struct kmem_cache *s, void *object, 474static struct track *get_track(struct kmem_cache *s, void *object,
@@ -625,12 +599,12 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
625 p, p - addr, get_freepointer(s, p)); 599 p, p - addr, get_freepointer(s, p));
626 600
627 if (p > addr + 16) 601 if (p > addr + 16)
628 print_section("Bytes b4", p - 16, 16); 602 print_section("Bytes b4 ", p - 16, 16);
629
630 print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE));
631 603
604 print_section("Object ", p, min_t(unsigned long, s->objsize,
605 PAGE_SIZE));
632 if (s->flags & SLAB_RED_ZONE) 606 if (s->flags & SLAB_RED_ZONE)
633 print_section("Redzone", p + s->objsize, 607 print_section("Redzone ", p + s->objsize,
634 s->inuse - s->objsize); 608 s->inuse - s->objsize);
635 609
636 if (s->offset) 610 if (s->offset)
@@ -643,7 +617,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
643 617
644 if (off != s->size) 618 if (off != s->size)
645 /* Beginning of the filler is the free pointer */ 619 /* Beginning of the filler is the free pointer */
646 print_section("Padding", p + off, s->size - off); 620 print_section("Padding ", p + off, s->size - off);
647 621
648 dump_stack(); 622 dump_stack();
649} 623}
@@ -838,7 +812,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
838 end--; 812 end--;
839 813
840 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); 814 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
841 print_section("Padding", end - remainder, remainder); 815 print_section("Padding ", end - remainder, remainder);
842 816
843 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); 817 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
844 return 0; 818 return 0;
@@ -987,7 +961,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
987 page->freelist); 961 page->freelist);
988 962
989 if (!alloc) 963 if (!alloc)
990 print_section("Object", (void *)object, s->objsize); 964 print_section("Object ", (void *)object, s->objsize);
991 965
992 dump_stack(); 966 dump_stack();
993 } 967 }
@@ -1447,7 +1421,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1447 set_freepointer(s, last, NULL); 1421 set_freepointer(s, last, NULL);
1448 1422
1449 page->freelist = start; 1423 page->freelist = start;
1450 page->inuse = 0; 1424 page->inuse = page->objects;
1451 page->frozen = 1; 1425 page->frozen = 1;
1452out: 1426out:
1453 return page; 1427 return page;
@@ -1534,7 +1508,7 @@ static inline void add_partial(struct kmem_cache_node *n,
1534 struct page *page, int tail) 1508 struct page *page, int tail)
1535{ 1509{
1536 n->nr_partial++; 1510 n->nr_partial++;
1537 if (tail) 1511 if (tail == DEACTIVATE_TO_TAIL)
1538 list_add_tail(&page->lru, &n->partial); 1512 list_add_tail(&page->lru, &n->partial);
1539 else 1513 else
1540 list_add(&page->lru, &n->partial); 1514 list_add(&page->lru, &n->partial);
@@ -1554,10 +1528,13 @@ static inline void remove_partial(struct kmem_cache_node *n,
1554 * Lock slab, remove from the partial list and put the object into the 1528 * Lock slab, remove from the partial list and put the object into the
1555 * per cpu freelist. 1529 * per cpu freelist.
1556 * 1530 *
1531 * Returns a list of objects or NULL if it fails.
1532 *
1557 * Must hold list_lock. 1533 * Must hold list_lock.
1558 */ 1534 */
1559static inline int acquire_slab(struct kmem_cache *s, 1535static inline void *acquire_slab(struct kmem_cache *s,
1560 struct kmem_cache_node *n, struct page *page) 1536 struct kmem_cache_node *n, struct page *page,
1537 int mode)
1561{ 1538{
1562 void *freelist; 1539 void *freelist;
1563 unsigned long counters; 1540 unsigned long counters;
@@ -1572,7 +1549,8 @@ static inline int acquire_slab(struct kmem_cache *s,
1572 freelist = page->freelist; 1549 freelist = page->freelist;
1573 counters = page->counters; 1550 counters = page->counters;
1574 new.counters = counters; 1551 new.counters = counters;
1575 new.inuse = page->objects; 1552 if (mode)
1553 new.inuse = page->objects;
1576 1554
1577 VM_BUG_ON(new.frozen); 1555 VM_BUG_ON(new.frozen);
1578 new.frozen = 1; 1556 new.frozen = 1;
@@ -1583,32 +1561,19 @@ static inline int acquire_slab(struct kmem_cache *s,
1583 "lock and freeze")); 1561 "lock and freeze"));
1584 1562
1585 remove_partial(n, page); 1563 remove_partial(n, page);
1586 1564 return freelist;
1587 if (freelist) {
1588 /* Populate the per cpu freelist */
1589 this_cpu_write(s->cpu_slab->freelist, freelist);
1590 this_cpu_write(s->cpu_slab->page, page);
1591 this_cpu_write(s->cpu_slab->node, page_to_nid(page));
1592 return 1;
1593 } else {
1594 /*
1595 * Slab page came from the wrong list. No object to allocate
1596 * from. Put it onto the correct list and continue partial
1597 * scan.
1598 */
1599 printk(KERN_ERR "SLUB: %s : Page without available objects on"
1600 " partial list\n", s->name);
1601 return 0;
1602 }
1603} 1565}
1604 1566
1567static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1568
1605/* 1569/*
1606 * Try to allocate a partial slab from a specific node. 1570 * Try to allocate a partial slab from a specific node.
1607 */ 1571 */
1608static struct page *get_partial_node(struct kmem_cache *s, 1572static void *get_partial_node(struct kmem_cache *s,
1609 struct kmem_cache_node *n) 1573 struct kmem_cache_node *n, struct kmem_cache_cpu *c)
1610{ 1574{
1611 struct page *page; 1575 struct page *page, *page2;
1576 void *object = NULL;
1612 1577
1613 /* 1578 /*
1614 * Racy check. If we mistakenly see no partial slabs then we 1579 * Racy check. If we mistakenly see no partial slabs then we
@@ -1620,26 +1585,43 @@ static struct page *get_partial_node(struct kmem_cache *s,
1620 return NULL; 1585 return NULL;
1621 1586
1622 spin_lock(&n->list_lock); 1587 spin_lock(&n->list_lock);
1623 list_for_each_entry(page, &n->partial, lru) 1588 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1624 if (acquire_slab(s, n, page)) 1589 void *t = acquire_slab(s, n, page, object == NULL);
1625 goto out; 1590 int available;
1626 page = NULL; 1591
1627out: 1592 if (!t)
1593 break;
1594
1595 if (!object) {
1596 c->page = page;
1597 c->node = page_to_nid(page);
1598 stat(s, ALLOC_FROM_PARTIAL);
1599 object = t;
1600 available = page->objects - page->inuse;
1601 } else {
1602 page->freelist = t;
1603 available = put_cpu_partial(s, page, 0);
1604 }
1605 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
1606 break;
1607
1608 }
1628 spin_unlock(&n->list_lock); 1609 spin_unlock(&n->list_lock);
1629 return page; 1610 return object;
1630} 1611}
1631 1612
1632/* 1613/*
1633 * Get a page from somewhere. Search in increasing NUMA distances. 1614 * Get a page from somewhere. Search in increasing NUMA distances.
1634 */ 1615 */
1635static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) 1616static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
1617 struct kmem_cache_cpu *c)
1636{ 1618{
1637#ifdef CONFIG_NUMA 1619#ifdef CONFIG_NUMA
1638 struct zonelist *zonelist; 1620 struct zonelist *zonelist;
1639 struct zoneref *z; 1621 struct zoneref *z;
1640 struct zone *zone; 1622 struct zone *zone;
1641 enum zone_type high_zoneidx = gfp_zone(flags); 1623 enum zone_type high_zoneidx = gfp_zone(flags);
1642 struct page *page; 1624 void *object;
1643 1625
1644 /* 1626 /*
1645 * The defrag ratio allows a configuration of the tradeoffs between 1627 * The defrag ratio allows a configuration of the tradeoffs between
@@ -1672,10 +1654,10 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1672 1654
1673 if (n && cpuset_zone_allowed_hardwall(zone, flags) && 1655 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1674 n->nr_partial > s->min_partial) { 1656 n->nr_partial > s->min_partial) {
1675 page = get_partial_node(s, n); 1657 object = get_partial_node(s, n, c);
1676 if (page) { 1658 if (object) {
1677 put_mems_allowed(); 1659 put_mems_allowed();
1678 return page; 1660 return object;
1679 } 1661 }
1680 } 1662 }
1681 } 1663 }
@@ -1687,16 +1669,17 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1687/* 1669/*
1688 * Get a partial page, lock it and return it. 1670 * Get a partial page, lock it and return it.
1689 */ 1671 */
1690static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) 1672static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1673 struct kmem_cache_cpu *c)
1691{ 1674{
1692 struct page *page; 1675 void *object;
1693 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; 1676 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1694 1677
1695 page = get_partial_node(s, get_node(s, searchnode)); 1678 object = get_partial_node(s, get_node(s, searchnode), c);
1696 if (page || node != NUMA_NO_NODE) 1679 if (object || node != NUMA_NO_NODE)
1697 return page; 1680 return object;
1698 1681
1699 return get_any_partial(s, flags); 1682 return get_any_partial(s, flags, c);
1700} 1683}
1701 1684
1702#ifdef CONFIG_PREEMPT 1685#ifdef CONFIG_PREEMPT
@@ -1765,9 +1748,6 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
1765 for_each_possible_cpu(cpu) 1748 for_each_possible_cpu(cpu)
1766 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); 1749 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1767} 1750}
1768/*
1769 * Remove the cpu slab
1770 */
1771 1751
1772/* 1752/*
1773 * Remove the cpu slab 1753 * Remove the cpu slab
@@ -1781,13 +1761,13 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1781 enum slab_modes l = M_NONE, m = M_NONE; 1761 enum slab_modes l = M_NONE, m = M_NONE;
1782 void *freelist; 1762 void *freelist;
1783 void *nextfree; 1763 void *nextfree;
1784 int tail = 0; 1764 int tail = DEACTIVATE_TO_HEAD;
1785 struct page new; 1765 struct page new;
1786 struct page old; 1766 struct page old;
1787 1767
1788 if (page->freelist) { 1768 if (page->freelist) {
1789 stat(s, DEACTIVATE_REMOTE_FREES); 1769 stat(s, DEACTIVATE_REMOTE_FREES);
1790 tail = 1; 1770 tail = DEACTIVATE_TO_TAIL;
1791 } 1771 }
1792 1772
1793 c->tid = next_tid(c->tid); 1773 c->tid = next_tid(c->tid);
@@ -1893,7 +1873,7 @@ redo:
1893 if (m == M_PARTIAL) { 1873 if (m == M_PARTIAL) {
1894 1874
1895 add_partial(n, page, tail); 1875 add_partial(n, page, tail);
1896 stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); 1876 stat(s, tail);
1897 1877
1898 } else if (m == M_FULL) { 1878 } else if (m == M_FULL) {
1899 1879
@@ -1920,6 +1900,123 @@ redo:
1920 } 1900 }
1921} 1901}
1922 1902
1903/* Unfreeze all the cpu partial slabs */
1904static void unfreeze_partials(struct kmem_cache *s)
1905{
1906 struct kmem_cache_node *n = NULL;
1907 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1908 struct page *page;
1909
1910 while ((page = c->partial)) {
1911 enum slab_modes { M_PARTIAL, M_FREE };
1912 enum slab_modes l, m;
1913 struct page new;
1914 struct page old;
1915
1916 c->partial = page->next;
1917 l = M_FREE;
1918
1919 do {
1920
1921 old.freelist = page->freelist;
1922 old.counters = page->counters;
1923 VM_BUG_ON(!old.frozen);
1924
1925 new.counters = old.counters;
1926 new.freelist = old.freelist;
1927
1928 new.frozen = 0;
1929
1930 if (!new.inuse && (!n || n->nr_partial > s->min_partial))
1931 m = M_FREE;
1932 else {
1933 struct kmem_cache_node *n2 = get_node(s,
1934 page_to_nid(page));
1935
1936 m = M_PARTIAL;
1937 if (n != n2) {
1938 if (n)
1939 spin_unlock(&n->list_lock);
1940
1941 n = n2;
1942 spin_lock(&n->list_lock);
1943 }
1944 }
1945
1946 if (l != m) {
1947 if (l == M_PARTIAL)
1948 remove_partial(n, page);
1949 else
1950 add_partial(n, page, 1);
1951
1952 l = m;
1953 }
1954
1955 } while (!cmpxchg_double_slab(s, page,
1956 old.freelist, old.counters,
1957 new.freelist, new.counters,
1958 "unfreezing slab"));
1959
1960 if (m == M_FREE) {
1961 stat(s, DEACTIVATE_EMPTY);
1962 discard_slab(s, page);
1963 stat(s, FREE_SLAB);
1964 }
1965 }
1966
1967 if (n)
1968 spin_unlock(&n->list_lock);
1969}
1970
1971/*
1972 * Put a page that was just frozen (in __slab_free) into a partial page
1973 * slot if available. This is done without interrupts disabled and without
1974 * preemption disabled. The cmpxchg is racy and may put the partial page
1975 * onto a random cpus partial slot.
1976 *
1977 * If we did not find a slot then simply move all the partials to the
1978 * per node partial list.
1979 */
1980int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1981{
1982 struct page *oldpage;
1983 int pages;
1984 int pobjects;
1985
1986 do {
1987 pages = 0;
1988 pobjects = 0;
1989 oldpage = this_cpu_read(s->cpu_slab->partial);
1990
1991 if (oldpage) {
1992 pobjects = oldpage->pobjects;
1993 pages = oldpage->pages;
1994 if (drain && pobjects > s->cpu_partial) {
1995 unsigned long flags;
1996 /*
1997 * partial array is full. Move the existing
1998 * set to the per node partial list.
1999 */
2000 local_irq_save(flags);
2001 unfreeze_partials(s);
2002 local_irq_restore(flags);
2003 pobjects = 0;
2004 pages = 0;
2005 }
2006 }
2007
2008 pages++;
2009 pobjects += page->objects - page->inuse;
2010
2011 page->pages = pages;
2012 page->pobjects = pobjects;
2013 page->next = oldpage;
2014
2015 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
2016 stat(s, CPU_PARTIAL_FREE);
2017 return pobjects;
2018}
2019
1923static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 2020static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1924{ 2021{
1925 stat(s, CPUSLAB_FLUSH); 2022 stat(s, CPUSLAB_FLUSH);
@@ -1935,8 +2032,12 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1935{ 2032{
1936 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 2033 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
1937 2034
1938 if (likely(c && c->page)) 2035 if (likely(c)) {
1939 flush_slab(s, c); 2036 if (c->page)
2037 flush_slab(s, c);
2038
2039 unfreeze_partials(s);
2040 }
1940} 2041}
1941 2042
1942static void flush_cpu_slab(void *d) 2043static void flush_cpu_slab(void *d)
@@ -2027,12 +2128,39 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2027 } 2128 }
2028} 2129}
2029 2130
2131static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2132 int node, struct kmem_cache_cpu **pc)
2133{
2134 void *object;
2135 struct kmem_cache_cpu *c;
2136 struct page *page = new_slab(s, flags, node);
2137
2138 if (page) {
2139 c = __this_cpu_ptr(s->cpu_slab);
2140 if (c->page)
2141 flush_slab(s, c);
2142
2143 /*
2144 * No other reference to the page yet so we can
2145 * muck around with it freely without cmpxchg
2146 */
2147 object = page->freelist;
2148 page->freelist = NULL;
2149
2150 stat(s, ALLOC_SLAB);
2151 c->node = page_to_nid(page);
2152 c->page = page;
2153 *pc = c;
2154 } else
2155 object = NULL;
2156
2157 return object;
2158}
2159
2030/* 2160/*
2031 * Slow path. The lockless freelist is empty or we need to perform 2161 * Slow path. The lockless freelist is empty or we need to perform
2032 * debugging duties. 2162 * debugging duties.
2033 * 2163 *
2034 * Interrupts are disabled.
2035 *
2036 * Processing is still very fast if new objects have been freed to the 2164 * Processing is still very fast if new objects have been freed to the
2037 * regular freelist. In that case we simply take over the regular freelist 2165 * regular freelist. In that case we simply take over the regular freelist
2038 * as the lockless freelist and zap the regular freelist. 2166 * as the lockless freelist and zap the regular freelist.
@@ -2049,7 +2177,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2049 unsigned long addr, struct kmem_cache_cpu *c) 2177 unsigned long addr, struct kmem_cache_cpu *c)
2050{ 2178{
2051 void **object; 2179 void **object;
2052 struct page *page;
2053 unsigned long flags; 2180 unsigned long flags;
2054 struct page new; 2181 struct page new;
2055 unsigned long counters; 2182 unsigned long counters;
@@ -2064,13 +2191,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2064 c = this_cpu_ptr(s->cpu_slab); 2191 c = this_cpu_ptr(s->cpu_slab);
2065#endif 2192#endif
2066 2193
2067 /* We handle __GFP_ZERO in the caller */ 2194 if (!c->page)
2068 gfpflags &= ~__GFP_ZERO;
2069
2070 page = c->page;
2071 if (!page)
2072 goto new_slab; 2195 goto new_slab;
2073 2196redo:
2074 if (unlikely(!node_match(c, node))) { 2197 if (unlikely(!node_match(c, node))) {
2075 stat(s, ALLOC_NODE_MISMATCH); 2198 stat(s, ALLOC_NODE_MISMATCH);
2076 deactivate_slab(s, c); 2199 deactivate_slab(s, c);
@@ -2080,8 +2203,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2080 stat(s, ALLOC_SLOWPATH); 2203 stat(s, ALLOC_SLOWPATH);
2081 2204
2082 do { 2205 do {
2083 object = page->freelist; 2206 object = c->page->freelist;
2084 counters = page->counters; 2207 counters = c->page->counters;
2085 new.counters = counters; 2208 new.counters = counters;
2086 VM_BUG_ON(!new.frozen); 2209 VM_BUG_ON(!new.frozen);
2087 2210
@@ -2093,17 +2216,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2093 * 2216 *
2094 * If there are objects left then we retrieve them 2217 * If there are objects left then we retrieve them
2095 * and use them to refill the per cpu queue. 2218 * and use them to refill the per cpu queue.
2096 */ 2219 */
2097 2220
2098 new.inuse = page->objects; 2221 new.inuse = c->page->objects;
2099 new.frozen = object != NULL; 2222 new.frozen = object != NULL;
2100 2223
2101 } while (!__cmpxchg_double_slab(s, page, 2224 } while (!__cmpxchg_double_slab(s, c->page,
2102 object, counters, 2225 object, counters,
2103 NULL, new.counters, 2226 NULL, new.counters,
2104 "__slab_alloc")); 2227 "__slab_alloc"));
2105 2228
2106 if (unlikely(!object)) { 2229 if (!object) {
2107 c->page = NULL; 2230 c->page = NULL;
2108 stat(s, DEACTIVATE_BYPASS); 2231 stat(s, DEACTIVATE_BYPASS);
2109 goto new_slab; 2232 goto new_slab;
@@ -2112,58 +2235,47 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2112 stat(s, ALLOC_REFILL); 2235 stat(s, ALLOC_REFILL);
2113 2236
2114load_freelist: 2237load_freelist:
2115 VM_BUG_ON(!page->frozen);
2116 c->freelist = get_freepointer(s, object); 2238 c->freelist = get_freepointer(s, object);
2117 c->tid = next_tid(c->tid); 2239 c->tid = next_tid(c->tid);
2118 local_irq_restore(flags); 2240 local_irq_restore(flags);
2119 return object; 2241 return object;
2120 2242
2121new_slab: 2243new_slab:
2122 page = get_partial(s, gfpflags, node);
2123 if (page) {
2124 stat(s, ALLOC_FROM_PARTIAL);
2125 object = c->freelist;
2126 2244
2127 if (kmem_cache_debug(s)) 2245 if (c->partial) {
2128 goto debug; 2246 c->page = c->partial;
2129 goto load_freelist; 2247 c->partial = c->page->next;
2248 c->node = page_to_nid(c->page);
2249 stat(s, CPU_PARTIAL_ALLOC);
2250 c->freelist = NULL;
2251 goto redo;
2130 } 2252 }
2131 2253
2132 page = new_slab(s, gfpflags, node); 2254 /* Then do expensive stuff like retrieving pages from the partial lists */
2255 object = get_partial(s, gfpflags, node, c);
2133 2256
2134 if (page) { 2257 if (unlikely(!object)) {
2135 c = __this_cpu_ptr(s->cpu_slab);
2136 if (c->page)
2137 flush_slab(s, c);
2138 2258
2139 /* 2259 object = new_slab_objects(s, gfpflags, node, &c);
2140 * No other reference to the page yet so we can
2141 * muck around with it freely without cmpxchg
2142 */
2143 object = page->freelist;
2144 page->freelist = NULL;
2145 page->inuse = page->objects;
2146 2260
2147 stat(s, ALLOC_SLAB); 2261 if (unlikely(!object)) {
2148 c->node = page_to_nid(page); 2262 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2149 c->page = page; 2263 slab_out_of_memory(s, gfpflags, node);
2150 2264
2151 if (kmem_cache_debug(s)) 2265 local_irq_restore(flags);
2152 goto debug; 2266 return NULL;
2153 goto load_freelist; 2267 }
2154 } 2268 }
2155 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2156 slab_out_of_memory(s, gfpflags, node);
2157 local_irq_restore(flags);
2158 return NULL;
2159 2269
2160debug: 2270 if (likely(!kmem_cache_debug(s)))
2161 if (!object || !alloc_debug_processing(s, page, object, addr)) 2271 goto load_freelist;
2162 goto new_slab; 2272
2273 /* Only entered in the debug case */
2274 if (!alloc_debug_processing(s, c->page, object, addr))
2275 goto new_slab; /* Slab failed checks. Next slab needed */
2163 2276
2164 c->freelist = get_freepointer(s, object); 2277 c->freelist = get_freepointer(s, object);
2165 deactivate_slab(s, c); 2278 deactivate_slab(s, c);
2166 c->page = NULL;
2167 c->node = NUMA_NO_NODE; 2279 c->node = NUMA_NO_NODE;
2168 local_irq_restore(flags); 2280 local_irq_restore(flags);
2169 return object; 2281 return object;
@@ -2333,16 +2445,29 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
2333 was_frozen = new.frozen; 2445 was_frozen = new.frozen;
2334 new.inuse--; 2446 new.inuse--;
2335 if ((!new.inuse || !prior) && !was_frozen && !n) { 2447 if ((!new.inuse || !prior) && !was_frozen && !n) {
2336 n = get_node(s, page_to_nid(page)); 2448
2337 /* 2449 if (!kmem_cache_debug(s) && !prior)
2338 * Speculatively acquire the list_lock. 2450
2339 * If the cmpxchg does not succeed then we may 2451 /*
2340 * drop the list_lock without any processing. 2452 * Slab was on no list before and will be partially empty
2341 * 2453 * We can defer the list move and instead freeze it.
2342 * Otherwise the list_lock will synchronize with 2454 */
2343 * other processors updating the list of slabs. 2455 new.frozen = 1;
2344 */ 2456
2345 spin_lock_irqsave(&n->list_lock, flags); 2457 else { /* Needs to be taken off a list */
2458
2459 n = get_node(s, page_to_nid(page));
2460 /*
2461 * Speculatively acquire the list_lock.
2462 * If the cmpxchg does not succeed then we may
2463 * drop the list_lock without any processing.
2464 *
2465 * Otherwise the list_lock will synchronize with
2466 * other processors updating the list of slabs.
2467 */
2468 spin_lock_irqsave(&n->list_lock, flags);
2469
2470 }
2346 } 2471 }
2347 inuse = new.inuse; 2472 inuse = new.inuse;
2348 2473
@@ -2352,7 +2477,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
2352 "__slab_free")); 2477 "__slab_free"));
2353 2478
2354 if (likely(!n)) { 2479 if (likely(!n)) {
2355 /* 2480
2481 /*
2482 * If we just froze the page then put it onto the
2483 * per cpu partial list.
2484 */
2485 if (new.frozen && !was_frozen)
2486 put_cpu_partial(s, page, 1);
2487
2488 /*
2356 * The list lock was not taken therefore no list 2489 * The list lock was not taken therefore no list
2357 * activity can be necessary. 2490 * activity can be necessary.
2358 */ 2491 */
@@ -2377,7 +2510,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
2377 */ 2510 */
2378 if (unlikely(!prior)) { 2511 if (unlikely(!prior)) {
2379 remove_full(s, page); 2512 remove_full(s, page);
2380 add_partial(n, page, 1); 2513 add_partial(n, page, DEACTIVATE_TO_TAIL);
2381 stat(s, FREE_ADD_PARTIAL); 2514 stat(s, FREE_ADD_PARTIAL);
2382 } 2515 }
2383 } 2516 }
@@ -2421,7 +2554,6 @@ static __always_inline void slab_free(struct kmem_cache *s,
2421 slab_free_hook(s, x); 2554 slab_free_hook(s, x);
2422 2555
2423redo: 2556redo:
2424
2425 /* 2557 /*
2426 * Determine the currently cpus per cpu slab. 2558 * Determine the currently cpus per cpu slab.
2427 * The cpu may change afterward. However that does not matter since 2559 * The cpu may change afterward. However that does not matter since
@@ -2685,7 +2817,7 @@ static void early_kmem_cache_node_alloc(int node)
2685 n = page->freelist; 2817 n = page->freelist;
2686 BUG_ON(!n); 2818 BUG_ON(!n);
2687 page->freelist = get_freepointer(kmem_cache_node, n); 2819 page->freelist = get_freepointer(kmem_cache_node, n);
2688 page->inuse++; 2820 page->inuse = 1;
2689 page->frozen = 0; 2821 page->frozen = 0;
2690 kmem_cache_node->node[node] = n; 2822 kmem_cache_node->node[node] = n;
2691#ifdef CONFIG_SLUB_DEBUG 2823#ifdef CONFIG_SLUB_DEBUG
@@ -2695,7 +2827,7 @@ static void early_kmem_cache_node_alloc(int node)
2695 init_kmem_cache_node(n, kmem_cache_node); 2827 init_kmem_cache_node(n, kmem_cache_node);
2696 inc_slabs_node(kmem_cache_node, node, page->objects); 2828 inc_slabs_node(kmem_cache_node, node, page->objects);
2697 2829
2698 add_partial(n, page, 0); 2830 add_partial(n, page, DEACTIVATE_TO_HEAD);
2699} 2831}
2700 2832
2701static void free_kmem_cache_nodes(struct kmem_cache *s) 2833static void free_kmem_cache_nodes(struct kmem_cache *s)
@@ -2911,7 +3043,34 @@ static int kmem_cache_open(struct kmem_cache *s,
2911 * The larger the object size is, the more pages we want on the partial 3043 * The larger the object size is, the more pages we want on the partial
2912 * list to avoid pounding the page allocator excessively. 3044 * list to avoid pounding the page allocator excessively.
2913 */ 3045 */
2914 set_min_partial(s, ilog2(s->size)); 3046 set_min_partial(s, ilog2(s->size) / 2);
3047
3048 /*
3049 * cpu_partial determined the maximum number of objects kept in the
3050 * per cpu partial lists of a processor.
3051 *
3052 * Per cpu partial lists mainly contain slabs that just have one
3053 * object freed. If they are used for allocation then they can be
3054 * filled up again with minimal effort. The slab will never hit the
3055 * per node partial lists and therefore no locking will be required.
3056 *
3057 * This setting also determines
3058 *
3059 * A) The number of objects from per cpu partial slabs dumped to the
3060 * per node list when we reach the limit.
3061 * B) The number of objects in cpu partial slabs to extract from the
3062 * per node list when we run out of per cpu objects. We only fetch 50%
3063 * to keep some capacity around for frees.
3064 */
3065 if (s->size >= PAGE_SIZE)
3066 s->cpu_partial = 2;
3067 else if (s->size >= 1024)
3068 s->cpu_partial = 6;
3069 else if (s->size >= 256)
3070 s->cpu_partial = 13;
3071 else
3072 s->cpu_partial = 30;
3073
2915 s->refcount = 1; 3074 s->refcount = 1;
2916#ifdef CONFIG_NUMA 3075#ifdef CONFIG_NUMA
2917 s->remote_node_defrag_ratio = 1000; 3076 s->remote_node_defrag_ratio = 1000;
@@ -2970,13 +3129,13 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
2970 3129
2971/* 3130/*
2972 * Attempt to free all partial slabs on a node. 3131 * Attempt to free all partial slabs on a node.
3132 * This is called from kmem_cache_close(). We must be the last thread
3133 * using the cache and therefore we do not need to lock anymore.
2973 */ 3134 */
2974static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) 3135static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
2975{ 3136{
2976 unsigned long flags;
2977 struct page *page, *h; 3137 struct page *page, *h;
2978 3138
2979 spin_lock_irqsave(&n->list_lock, flags);
2980 list_for_each_entry_safe(page, h, &n->partial, lru) { 3139 list_for_each_entry_safe(page, h, &n->partial, lru) {
2981 if (!page->inuse) { 3140 if (!page->inuse) {
2982 remove_partial(n, page); 3141 remove_partial(n, page);
@@ -2986,7 +3145,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
2986 "Objects remaining on kmem_cache_close()"); 3145 "Objects remaining on kmem_cache_close()");
2987 } 3146 }
2988 } 3147 }
2989 spin_unlock_irqrestore(&n->list_lock, flags);
2990} 3148}
2991 3149
2992/* 3150/*
@@ -3020,6 +3178,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
3020 s->refcount--; 3178 s->refcount--;
3021 if (!s->refcount) { 3179 if (!s->refcount) {
3022 list_del(&s->list); 3180 list_del(&s->list);
3181 up_write(&slub_lock);
3023 if (kmem_cache_close(s)) { 3182 if (kmem_cache_close(s)) {
3024 printk(KERN_ERR "SLUB %s: %s called for cache that " 3183 printk(KERN_ERR "SLUB %s: %s called for cache that "
3025 "still has objects.\n", s->name, __func__); 3184 "still has objects.\n", s->name, __func__);
@@ -3028,8 +3187,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
3028 if (s->flags & SLAB_DESTROY_BY_RCU) 3187 if (s->flags & SLAB_DESTROY_BY_RCU)
3029 rcu_barrier(); 3188 rcu_barrier();
3030 sysfs_slab_remove(s); 3189 sysfs_slab_remove(s);
3031 } 3190 } else
3032 up_write(&slub_lock); 3191 up_write(&slub_lock);
3033} 3192}
3034EXPORT_SYMBOL(kmem_cache_destroy); 3193EXPORT_SYMBOL(kmem_cache_destroy);
3035 3194
@@ -3347,23 +3506,23 @@ int kmem_cache_shrink(struct kmem_cache *s)
3347 * list_lock. page->inuse here is the upper limit. 3506 * list_lock. page->inuse here is the upper limit.
3348 */ 3507 */
3349 list_for_each_entry_safe(page, t, &n->partial, lru) { 3508 list_for_each_entry_safe(page, t, &n->partial, lru) {
3350 if (!page->inuse) { 3509 list_move(&page->lru, slabs_by_inuse + page->inuse);
3351 remove_partial(n, page); 3510 if (!page->inuse)
3352 discard_slab(s, page); 3511 n->nr_partial--;
3353 } else {
3354 list_move(&page->lru,
3355 slabs_by_inuse + page->inuse);
3356 }
3357 } 3512 }
3358 3513
3359 /* 3514 /*
3360 * Rebuild the partial list with the slabs filled up most 3515 * Rebuild the partial list with the slabs filled up most
3361 * first and the least used slabs at the end. 3516 * first and the least used slabs at the end.
3362 */ 3517 */
3363 for (i = objects - 1; i >= 0; i--) 3518 for (i = objects - 1; i > 0; i--)
3364 list_splice(slabs_by_inuse + i, n->partial.prev); 3519 list_splice(slabs_by_inuse + i, n->partial.prev);
3365 3520
3366 spin_unlock_irqrestore(&n->list_lock, flags); 3521 spin_unlock_irqrestore(&n->list_lock, flags);
3522
3523 /* Release empty slabs */
3524 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3525 discard_slab(s, page);
3367 } 3526 }
3368 3527
3369 kfree(slabs_by_inuse); 3528 kfree(slabs_by_inuse);
@@ -4319,6 +4478,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4319 4478
4320 for_each_possible_cpu(cpu) { 4479 for_each_possible_cpu(cpu) {
4321 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 4480 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4481 struct page *page;
4322 4482
4323 if (!c || c->node < 0) 4483 if (!c || c->node < 0)
4324 continue; 4484 continue;
@@ -4334,6 +4494,13 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4334 total += x; 4494 total += x;
4335 nodes[c->node] += x; 4495 nodes[c->node] += x;
4336 } 4496 }
4497 page = c->partial;
4498
4499 if (page) {
4500 x = page->pobjects;
4501 total += x;
4502 nodes[c->node] += x;
4503 }
4337 per_cpu[c->node]++; 4504 per_cpu[c->node]++;
4338 } 4505 }
4339 } 4506 }
@@ -4412,11 +4579,12 @@ struct slab_attribute {
4412}; 4579};
4413 4580
4414#define SLAB_ATTR_RO(_name) \ 4581#define SLAB_ATTR_RO(_name) \
4415 static struct slab_attribute _name##_attr = __ATTR_RO(_name) 4582 static struct slab_attribute _name##_attr = \
4583 __ATTR(_name, 0400, _name##_show, NULL)
4416 4584
4417#define SLAB_ATTR(_name) \ 4585#define SLAB_ATTR(_name) \
4418 static struct slab_attribute _name##_attr = \ 4586 static struct slab_attribute _name##_attr = \
4419 __ATTR(_name, 0644, _name##_show, _name##_store) 4587 __ATTR(_name, 0600, _name##_show, _name##_store)
4420 4588
4421static ssize_t slab_size_show(struct kmem_cache *s, char *buf) 4589static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4422{ 4590{
@@ -4485,6 +4653,27 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4485} 4653}
4486SLAB_ATTR(min_partial); 4654SLAB_ATTR(min_partial);
4487 4655
4656static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4657{
4658 return sprintf(buf, "%u\n", s->cpu_partial);
4659}
4660
4661static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4662 size_t length)
4663{
4664 unsigned long objects;
4665 int err;
4666
4667 err = strict_strtoul(buf, 10, &objects);
4668 if (err)
4669 return err;
4670
4671 s->cpu_partial = objects;
4672 flush_all(s);
4673 return length;
4674}
4675SLAB_ATTR(cpu_partial);
4676
4488static ssize_t ctor_show(struct kmem_cache *s, char *buf) 4677static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4489{ 4678{
4490 if (!s->ctor) 4679 if (!s->ctor)
@@ -4523,6 +4712,37 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4523} 4712}
4524SLAB_ATTR_RO(objects_partial); 4713SLAB_ATTR_RO(objects_partial);
4525 4714
4715static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4716{
4717 int objects = 0;
4718 int pages = 0;
4719 int cpu;
4720 int len;
4721
4722 for_each_online_cpu(cpu) {
4723 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4724
4725 if (page) {
4726 pages += page->pages;
4727 objects += page->pobjects;
4728 }
4729 }
4730
4731 len = sprintf(buf, "%d(%d)", objects, pages);
4732
4733#ifdef CONFIG_SMP
4734 for_each_online_cpu(cpu) {
4735 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4736
4737 if (page && len < PAGE_SIZE - 20)
4738 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4739 page->pobjects, page->pages);
4740 }
4741#endif
4742 return len + sprintf(buf + len, "\n");
4743}
4744SLAB_ATTR_RO(slabs_cpu_partial);
4745
4526static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) 4746static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4527{ 4747{
4528 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); 4748 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
@@ -4845,6 +5065,8 @@ STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
4845STAT_ATTR(ORDER_FALLBACK, order_fallback); 5065STAT_ATTR(ORDER_FALLBACK, order_fallback);
4846STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); 5066STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
4847STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); 5067STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5068STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5069STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
4848#endif 5070#endif
4849 5071
4850static struct attribute *slab_attrs[] = { 5072static struct attribute *slab_attrs[] = {
@@ -4853,6 +5075,7 @@ static struct attribute *slab_attrs[] = {
4853 &objs_per_slab_attr.attr, 5075 &objs_per_slab_attr.attr,
4854 &order_attr.attr, 5076 &order_attr.attr,
4855 &min_partial_attr.attr, 5077 &min_partial_attr.attr,
5078 &cpu_partial_attr.attr,
4856 &objects_attr.attr, 5079 &objects_attr.attr,
4857 &objects_partial_attr.attr, 5080 &objects_partial_attr.attr,
4858 &partial_attr.attr, 5081 &partial_attr.attr,
@@ -4865,6 +5088,7 @@ static struct attribute *slab_attrs[] = {
4865 &destroy_by_rcu_attr.attr, 5088 &destroy_by_rcu_attr.attr,
4866 &shrink_attr.attr, 5089 &shrink_attr.attr,
4867 &reserved_attr.attr, 5090 &reserved_attr.attr,
5091 &slabs_cpu_partial_attr.attr,
4868#ifdef CONFIG_SLUB_DEBUG 5092#ifdef CONFIG_SLUB_DEBUG
4869 &total_objects_attr.attr, 5093 &total_objects_attr.attr,
4870 &slabs_attr.attr, 5094 &slabs_attr.attr,
@@ -4906,6 +5130,8 @@ static struct attribute *slab_attrs[] = {
4906 &order_fallback_attr.attr, 5130 &order_fallback_attr.attr,
4907 &cmpxchg_double_fail_attr.attr, 5131 &cmpxchg_double_fail_attr.attr,
4908 &cmpxchg_double_cpu_fail_attr.attr, 5132 &cmpxchg_double_cpu_fail_attr.attr,
5133 &cpu_partial_alloc_attr.attr,
5134 &cpu_partial_free_attr.attr,
4909#endif 5135#endif
4910#ifdef CONFIG_FAILSLAB 5136#ifdef CONFIG_FAILSLAB
4911 &failslab_attr.attr, 5137 &failslab_attr.attr,
@@ -5257,7 +5483,7 @@ static const struct file_operations proc_slabinfo_operations = {
5257 5483
5258static int __init slab_proc_init(void) 5484static int __init slab_proc_init(void)
5259{ 5485{
5260 proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); 5486 proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
5261 return 0; 5487 return 0;
5262} 5488}
5263module_init(slab_proc_init); 5489module_init(slab_proc_init);