aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2011-06-01 13:25:52 -0400
committerPekka Enberg <penberg@kernel.org>2011-07-02 06:26:54 -0400
commit2cfb7455d223ab24b23df44be430faf92e12390f (patch)
tree6147fb32816d831d9f3f0e583a702aa4896f8354 /mm
parent61728d1efc927eccfa64c50ede4998a8765805c3 (diff)
slub: Rework allocator fastpaths
Rework the allocation paths so that updates of the page freelist, frozen state and number of objects use cmpxchg_double_slab(). Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Pekka Enberg <penberg@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/slub.c409
1 files changed, 280 insertions, 129 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 5cf98ff09360..5f0346c97c5f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -992,11 +992,6 @@ static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *pa
992 if (!check_slab(s, page)) 992 if (!check_slab(s, page))
993 goto bad; 993 goto bad;
994 994
995 if (!on_freelist(s, page, object)) {
996 object_err(s, page, object, "Object already allocated");
997 goto bad;
998 }
999
1000 if (!check_valid_pointer(s, page, object)) { 995 if (!check_valid_pointer(s, page, object)) {
1001 object_err(s, page, object, "Freelist Pointer check fails"); 996 object_err(s, page, object, "Freelist Pointer check fails");
1002 goto bad; 997 goto bad;
@@ -1060,14 +1055,6 @@ static noinline int free_debug_processing(struct kmem_cache *s,
1060 goto fail; 1055 goto fail;
1061 } 1056 }
1062 1057
1063 /* Special debug activities for freeing objects */
1064 if (!page->frozen && !page->freelist) {
1065 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1066
1067 spin_lock(&n->list_lock);
1068 remove_full(s, page);
1069 spin_unlock(&n->list_lock);
1070 }
1071 if (s->flags & SLAB_STORE_USER) 1058 if (s->flags & SLAB_STORE_USER)
1072 set_track(s, object, TRACK_FREE, addr); 1059 set_track(s, object, TRACK_FREE, addr);
1073 trace(s, page, object, 0); 1060 trace(s, page, object, 0);
@@ -1178,6 +1165,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
1178 void *object, u8 val) { return 1; } 1165 void *object, u8 val) { return 1; }
1179static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, 1166static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1180 struct page *page) {} 1167 struct page *page) {}
1168static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1181static inline unsigned long kmem_cache_flags(unsigned long objsize, 1169static inline unsigned long kmem_cache_flags(unsigned long objsize,
1182 unsigned long flags, const char *name, 1170 unsigned long flags, const char *name,
1183 void (*ctor)(void *)) 1171 void (*ctor)(void *))
@@ -1460,11 +1448,52 @@ static inline void remove_partial(struct kmem_cache_node *n,
1460static inline int lock_and_freeze_slab(struct kmem_cache *s, 1448static inline int lock_and_freeze_slab(struct kmem_cache *s,
1461 struct kmem_cache_node *n, struct page *page) 1449 struct kmem_cache_node *n, struct page *page)
1462{ 1450{
1463 if (slab_trylock(page)) { 1451 void *freelist;
1464 remove_partial(n, page); 1452 unsigned long counters;
1453 struct page new;
1454
1455
1456 if (!slab_trylock(page))
1457 return 0;
1458
1459 /*
1460 * Zap the freelist and set the frozen bit.
1461 * The old freelist is the list of objects for the
1462 * per cpu allocation list.
1463 */
1464 do {
1465 freelist = page->freelist;
1466 counters = page->counters;
1467 new.counters = counters;
1468 new.inuse = page->objects;
1469
1470 VM_BUG_ON(new.frozen);
1471 new.frozen = 1;
1472
1473 } while (!cmpxchg_double_slab(s, page,
1474 freelist, counters,
1475 NULL, new.counters,
1476 "lock and freeze"));
1477
1478 remove_partial(n, page);
1479
1480 if (freelist) {
1481 /* Populate the per cpu freelist */
1482 this_cpu_write(s->cpu_slab->freelist, freelist);
1483 this_cpu_write(s->cpu_slab->page, page);
1484 this_cpu_write(s->cpu_slab->node, page_to_nid(page));
1465 return 1; 1485 return 1;
1486 } else {
1487 /*
1488 * Slab page came from the wrong list. No object to allocate
1489 * from. Put it onto the correct list and continue partial
1490 * scan.
1491 */
1492 printk(KERN_ERR "SLUB: %s : Page without available objects on"
1493 " partial list\n", s->name);
1494 slab_unlock(page);
1495 return 0;
1466 } 1496 }
1467 return 0;
1468} 1497}
1469 1498
1470/* 1499/*
@@ -1564,59 +1593,6 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1564 return get_any_partial(s, flags); 1593 return get_any_partial(s, flags);
1565} 1594}
1566 1595
1567/*
1568 * Move a page back to the lists.
1569 *
1570 * Must be called with the slab lock held.
1571 *
1572 * On exit the slab lock will have been dropped.
1573 */
1574static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1575 __releases(bitlock)
1576{
1577 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1578
1579 if (page->inuse) {
1580
1581 if (page->freelist) {
1582 spin_lock(&n->list_lock);
1583 add_partial(n, page, tail);
1584 spin_unlock(&n->list_lock);
1585 stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1586 } else {
1587 stat(s, DEACTIVATE_FULL);
1588 if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER)) {
1589 spin_lock(&n->list_lock);
1590 add_full(s, n, page);
1591 spin_unlock(&n->list_lock);
1592 }
1593 }
1594 slab_unlock(page);
1595 } else {
1596 stat(s, DEACTIVATE_EMPTY);
1597 if (n->nr_partial < s->min_partial) {
1598 /*
1599 * Adding an empty slab to the partial slabs in order
1600 * to avoid page allocator overhead. This slab needs
1601 * to come after the other slabs with objects in
1602 * so that the others get filled first. That way the
1603 * size of the partial list stays small.
1604 *
1605 * kmem_cache_shrink can reclaim any empty slabs from
1606 * the partial list.
1607 */
1608 spin_lock(&n->list_lock);
1609 add_partial(n, page, 1);
1610 spin_unlock(&n->list_lock);
1611 slab_unlock(page);
1612 } else {
1613 slab_unlock(page);
1614 stat(s, FREE_SLAB);
1615 discard_slab(s, page);
1616 }
1617 }
1618}
1619
1620#ifdef CONFIG_PREEMPT 1596#ifdef CONFIG_PREEMPT
1621/* 1597/*
1622 * Calculate the next globally unique transaction for disambiguiation 1598 * Calculate the next globally unique transaction for disambiguiation
@@ -1686,37 +1662,158 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
1686/* 1662/*
1687 * Remove the cpu slab 1663 * Remove the cpu slab
1688 */ 1664 */
1665
1666/*
1667 * Remove the cpu slab
1668 */
1689static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1669static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1690 __releases(bitlock)
1691{ 1670{
1671 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1692 struct page *page = c->page; 1672 struct page *page = c->page;
1693 int tail = 1; 1673 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1694 1674 int lock = 0;
1695 if (page->freelist) 1675 enum slab_modes l = M_NONE, m = M_NONE;
1676 void *freelist;
1677 void *nextfree;
1678 int tail = 0;
1679 struct page new;
1680 struct page old;
1681
1682 if (page->freelist) {
1696 stat(s, DEACTIVATE_REMOTE_FREES); 1683 stat(s, DEACTIVATE_REMOTE_FREES);
1684 tail = 1;
1685 }
1686
1687 c->tid = next_tid(c->tid);
1688 c->page = NULL;
1689 freelist = c->freelist;
1690 c->freelist = NULL;
1691
1697 /* 1692 /*
1698 * Merge cpu freelist into slab freelist. Typically we get here 1693 * Stage one: Free all available per cpu objects back
1699 * because both freelists are empty. So this is unlikely 1694 * to the page freelist while it is still frozen. Leave the
1700 * to occur. 1695 * last one.
1696 *
1697 * There is no need to take the list->lock because the page
1698 * is still frozen.
1701 */ 1699 */
1702 while (unlikely(c->freelist)) { 1700 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1703 void **object; 1701 void *prior;
1702 unsigned long counters;
1703
1704 do {
1705 prior = page->freelist;
1706 counters = page->counters;
1707 set_freepointer(s, freelist, prior);
1708 new.counters = counters;
1709 new.inuse--;
1710 VM_BUG_ON(!new.frozen);
1711
1712 } while (!cmpxchg_double_slab(s, page,
1713 prior, counters,
1714 freelist, new.counters,
1715 "drain percpu freelist"));
1716
1717 freelist = nextfree;
1718 }
1704 1719
1705 tail = 0; /* Hot objects. Put the slab first */ 1720 /*
1721 * Stage two: Ensure that the page is unfrozen while the
1722 * list presence reflects the actual number of objects
1723 * during unfreeze.
1724 *
1725 * We setup the list membership and then perform a cmpxchg
1726 * with the count. If there is a mismatch then the page
1727 * is not unfrozen but the page is on the wrong list.
1728 *
1729 * Then we restart the process which may have to remove
1730 * the page from the list that we just put it on again
1731 * because the number of objects in the slab may have
1732 * changed.
1733 */
1734redo:
1706 1735
1707 /* Retrieve object from cpu_freelist */ 1736 old.freelist = page->freelist;
1708 object = c->freelist; 1737 old.counters = page->counters;
1709 c->freelist = get_freepointer(s, c->freelist); 1738 VM_BUG_ON(!old.frozen);
1710 1739
1711 /* And put onto the regular freelist */ 1740 /* Determine target state of the slab */
1712 set_freepointer(s, object, page->freelist); 1741 new.counters = old.counters;
1713 page->freelist = object; 1742 if (freelist) {
1714 page->inuse--; 1743 new.inuse--;
1744 set_freepointer(s, freelist, old.freelist);
1745 new.freelist = freelist;
1746 } else
1747 new.freelist = old.freelist;
1748
1749 new.frozen = 0;
1750
1751 if (!new.inuse && n->nr_partial < s->min_partial)
1752 m = M_FREE;
1753 else if (new.freelist) {
1754 m = M_PARTIAL;
1755 if (!lock) {
1756 lock = 1;
1757 /*
1758 * Taking the spinlock removes the possiblity
1759 * that acquire_slab() will see a slab page that
1760 * is frozen
1761 */
1762 spin_lock(&n->list_lock);
1763 }
1764 } else {
1765 m = M_FULL;
1766 if (kmem_cache_debug(s) && !lock) {
1767 lock = 1;
1768 /*
1769 * This also ensures that the scanning of full
1770 * slabs from diagnostic functions will not see
1771 * any frozen slabs.
1772 */
1773 spin_lock(&n->list_lock);
1774 }
1775 }
1776
1777 if (l != m) {
1778
1779 if (l == M_PARTIAL)
1780
1781 remove_partial(n, page);
1782
1783 else if (l == M_FULL)
1784
1785 remove_full(s, page);
1786
1787 if (m == M_PARTIAL) {
1788
1789 add_partial(n, page, tail);
1790 stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1791
1792 } else if (m == M_FULL) {
1793
1794 stat(s, DEACTIVATE_FULL);
1795 add_full(s, n, page);
1796
1797 }
1798 }
1799
1800 l = m;
1801 if (!cmpxchg_double_slab(s, page,
1802 old.freelist, old.counters,
1803 new.freelist, new.counters,
1804 "unfreezing slab"))
1805 goto redo;
1806
1807 slab_unlock(page);
1808
1809 if (lock)
1810 spin_unlock(&n->list_lock);
1811
1812 if (m == M_FREE) {
1813 stat(s, DEACTIVATE_EMPTY);
1814 discard_slab(s, page);
1815 stat(s, FREE_SLAB);
1715 } 1816 }
1716 c->page = NULL;
1717 c->tid = next_tid(c->tid);
1718 page->frozen = 0;
1719 unfreeze_slab(s, page, tail);
1720} 1817}
1721 1818
1722static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1819static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
@@ -1851,6 +1948,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1851 void **object; 1948 void **object;
1852 struct page *page; 1949 struct page *page;
1853 unsigned long flags; 1950 unsigned long flags;
1951 struct page new;
1952 unsigned long counters;
1854 1953
1855 local_irq_save(flags); 1954 local_irq_save(flags);
1856#ifdef CONFIG_PREEMPT 1955#ifdef CONFIG_PREEMPT
@@ -1873,25 +1972,33 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1873 if (unlikely(!node_match(c, node))) 1972 if (unlikely(!node_match(c, node)))
1874 goto another_slab; 1973 goto another_slab;
1875 1974
1876 stat(s, ALLOC_REFILL); 1975 stat(s, ALLOC_SLOWPATH);
1976
1977 do {
1978 object = page->freelist;
1979 counters = page->counters;
1980 new.counters = counters;
1981 new.inuse = page->objects;
1982 VM_BUG_ON(!new.frozen);
1983
1984 } while (!cmpxchg_double_slab(s, page,
1985 object, counters,
1986 NULL, new.counters,
1987 "__slab_alloc"));
1877 1988
1878load_freelist: 1989load_freelist:
1879 VM_BUG_ON(!page->frozen); 1990 VM_BUG_ON(!page->frozen);
1880 1991
1881 object = page->freelist;
1882 if (unlikely(!object)) 1992 if (unlikely(!object))
1883 goto another_slab; 1993 goto another_slab;
1884 if (kmem_cache_debug(s))
1885 goto debug;
1886 1994
1887 c->freelist = get_freepointer(s, object); 1995 stat(s, ALLOC_REFILL);
1888 page->inuse = page->objects;
1889 page->freelist = NULL;
1890 1996
1891 slab_unlock(page); 1997 slab_unlock(page);
1998
1999 c->freelist = get_freepointer(s, object);
1892 c->tid = next_tid(c->tid); 2000 c->tid = next_tid(c->tid);
1893 local_irq_restore(flags); 2001 local_irq_restore(flags);
1894 stat(s, ALLOC_SLOWPATH);
1895 return object; 2002 return object;
1896 2003
1897another_slab: 2004another_slab:
@@ -1901,9 +2008,10 @@ new_slab:
1901 page = get_partial(s, gfpflags, node); 2008 page = get_partial(s, gfpflags, node);
1902 if (page) { 2009 if (page) {
1903 stat(s, ALLOC_FROM_PARTIAL); 2010 stat(s, ALLOC_FROM_PARTIAL);
1904 page->frozen = 1; 2011 object = c->freelist;
1905 c->node = page_to_nid(page); 2012
1906 c->page = page; 2013 if (kmem_cache_debug(s))
2014 goto debug;
1907 goto load_freelist; 2015 goto load_freelist;
1908 } 2016 }
1909 2017
@@ -1911,12 +2019,19 @@ new_slab:
1911 2019
1912 if (page) { 2020 if (page) {
1913 c = __this_cpu_ptr(s->cpu_slab); 2021 c = __this_cpu_ptr(s->cpu_slab);
1914 stat(s, ALLOC_SLAB);
1915 if (c->page) 2022 if (c->page)
1916 flush_slab(s, c); 2023 flush_slab(s, c);
1917 2024
2025 /*
2026 * No other reference to the page yet so we can
2027 * muck around with it freely without cmpxchg
2028 */
2029 object = page->freelist;
2030 page->freelist = NULL;
2031 page->inuse = page->objects;
2032
2033 stat(s, ALLOC_SLAB);
1918 slab_lock(page); 2034 slab_lock(page);
1919 page->frozen = 1;
1920 c->node = page_to_nid(page); 2035 c->node = page_to_nid(page);
1921 c->page = page; 2036 c->page = page;
1922 goto load_freelist; 2037 goto load_freelist;
@@ -1925,12 +2040,12 @@ new_slab:
1925 slab_out_of_memory(s, gfpflags, node); 2040 slab_out_of_memory(s, gfpflags, node);
1926 local_irq_restore(flags); 2041 local_irq_restore(flags);
1927 return NULL; 2042 return NULL;
2043
1928debug: 2044debug:
1929 if (!alloc_debug_processing(s, page, object, addr)) 2045 if (!object || !alloc_debug_processing(s, page, object, addr))
1930 goto another_slab; 2046 goto new_slab;
1931 2047
1932 page->inuse++; 2048 c->freelist = get_freepointer(s, object);
1933 page->freelist = get_freepointer(s, object);
1934 deactivate_slab(s, c); 2049 deactivate_slab(s, c);
1935 c->page = NULL; 2050 c->page = NULL;
1936 c->node = NUMA_NO_NODE; 2051 c->node = NUMA_NO_NODE;
@@ -2082,6 +2197,11 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
2082{ 2197{
2083 void *prior; 2198 void *prior;
2084 void **object = (void *)x; 2199 void **object = (void *)x;
2200 int was_frozen;
2201 int inuse;
2202 struct page new;
2203 unsigned long counters;
2204 struct kmem_cache_node *n = NULL;
2085 unsigned long uninitialized_var(flags); 2205 unsigned long uninitialized_var(flags);
2086 2206
2087 local_irq_save(flags); 2207 local_irq_save(flags);
@@ -2091,32 +2211,65 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
2091 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) 2211 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
2092 goto out_unlock; 2212 goto out_unlock;
2093 2213
2094 prior = page->freelist; 2214 do {
2095 set_freepointer(s, object, prior); 2215 prior = page->freelist;
2096 page->freelist = object; 2216 counters = page->counters;
2097 page->inuse--; 2217 set_freepointer(s, object, prior);
2218 new.counters = counters;
2219 was_frozen = new.frozen;
2220 new.inuse--;
2221 if ((!new.inuse || !prior) && !was_frozen && !n) {
2222 n = get_node(s, page_to_nid(page));
2223 /*
2224 * Speculatively acquire the list_lock.
2225 * If the cmpxchg does not succeed then we may
2226 * drop the list_lock without any processing.
2227 *
2228 * Otherwise the list_lock will synchronize with
2229 * other processors updating the list of slabs.
2230 */
2231 spin_lock(&n->list_lock);
2232 }
2233 inuse = new.inuse;
2098 2234
2099 if (unlikely(page->frozen)) { 2235 } while (!cmpxchg_double_slab(s, page,
2100 stat(s, FREE_FROZEN); 2236 prior, counters,
2101 goto out_unlock; 2237 object, new.counters,
2102 } 2238 "__slab_free"));
2103 2239
2104 if (unlikely(!page->inuse)) 2240 if (likely(!n)) {
2105 goto slab_empty; 2241 /*
2242 * The list lock was not taken therefore no list
2243 * activity can be necessary.
2244 */
2245 if (was_frozen)
2246 stat(s, FREE_FROZEN);
2247 goto out_unlock;
2248 }
2106 2249
2107 /* 2250 /*
2108 * Objects left in the slab. If it was not on the partial list before 2251 * was_frozen may have been set after we acquired the list_lock in
2109 * then add it. 2252 * an earlier loop. So we need to check it here again.
2110 */ 2253 */
2111 if (unlikely(!prior)) { 2254 if (was_frozen)
2112 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 2255 stat(s, FREE_FROZEN);
2256 else {
2257 if (unlikely(!inuse && n->nr_partial > s->min_partial))
2258 goto slab_empty;
2113 2259
2114 spin_lock(&n->list_lock); 2260 /*
2115 add_partial(get_node(s, page_to_nid(page)), page, 1); 2261 * Objects left in the slab. If it was not on the partial list before
2116 spin_unlock(&n->list_lock); 2262 * then add it.
2117 stat(s, FREE_ADD_PARTIAL); 2263 */
2264 if (unlikely(!prior)) {
2265 remove_full(s, page);
2266 add_partial(n, page, 0);
2267 stat(s, FREE_ADD_PARTIAL);
2268 }
2118 } 2269 }
2119 2270
2271 spin_unlock(&n->list_lock);
2272
2120out_unlock: 2273out_unlock:
2121 slab_unlock(page); 2274 slab_unlock(page);
2122 local_irq_restore(flags); 2275 local_irq_restore(flags);
@@ -2127,13 +2280,11 @@ slab_empty:
2127 /* 2280 /*
2128 * Slab still on the partial list. 2281 * Slab still on the partial list.
2129 */ 2282 */
2130 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2131
2132 spin_lock(&n->list_lock);
2133 remove_partial(n, page); 2283 remove_partial(n, page);
2134 spin_unlock(&n->list_lock);
2135 stat(s, FREE_REMOVE_PARTIAL); 2284 stat(s, FREE_REMOVE_PARTIAL);
2136 } 2285 }
2286
2287 spin_unlock(&n->list_lock);
2137 slab_unlock(page); 2288 slab_unlock(page);
2138 local_irq_restore(flags); 2289 local_irq_restore(flags);
2139 stat(s, FREE_SLAB); 2290 stat(s, FREE_SLAB);