aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c376
1 files changed, 309 insertions, 67 deletions
diff --git a/mm/slub.c b/mm/slub.c
index e15aa7f193c9..94d2a33a866e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -64,7 +64,7 @@
64 * we must stay away from it for a while since we may cause a bouncing 64 * we must stay away from it for a while since we may cause a bouncing
65 * cacheline if we try to acquire the lock. So go onto the next slab. 65 * cacheline if we try to acquire the lock. So go onto the next slab.
66 * If all pages are busy then we may allocate a new slab instead of reusing 66 * If all pages are busy then we may allocate a new slab instead of reusing
67 * a partial slab. A new slab has noone operating on it and thus there is 67 * a partial slab. A new slab has no one operating on it and thus there is
68 * no danger of cacheline contention. 68 * no danger of cacheline contention.
69 * 69 *
70 * Interrupts are disabled during allocation and deallocation in order to 70 * Interrupts are disabled during allocation and deallocation in order to
@@ -217,7 +217,7 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
217 217
218#endif 218#endif
219 219
220static inline void stat(struct kmem_cache *s, enum stat_item si) 220static inline void stat(const struct kmem_cache *s, enum stat_item si)
221{ 221{
222#ifdef CONFIG_SLUB_STATS 222#ifdef CONFIG_SLUB_STATS
223 __this_cpu_inc(s->cpu_slab->stat[si]); 223 __this_cpu_inc(s->cpu_slab->stat[si]);
@@ -281,11 +281,40 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
281 return (p - addr) / s->size; 281 return (p - addr) / s->size;
282} 282}
283 283
284static inline size_t slab_ksize(const struct kmem_cache *s)
285{
286#ifdef CONFIG_SLUB_DEBUG
287 /*
288 * Debugging requires use of the padding between object
289 * and whatever may come after it.
290 */
291 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
292 return s->objsize;
293
294#endif
295 /*
296 * If we have the need to store the freelist pointer
297 * back there or track user information then we can
298 * only use the space before that information.
299 */
300 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
301 return s->inuse;
302 /*
303 * Else we can use all the padding etc for the allocation
304 */
305 return s->size;
306}
307
308static inline int order_objects(int order, unsigned long size, int reserved)
309{
310 return ((PAGE_SIZE << order) - reserved) / size;
311}
312
284static inline struct kmem_cache_order_objects oo_make(int order, 313static inline struct kmem_cache_order_objects oo_make(int order,
285 unsigned long size) 314 unsigned long size, int reserved)
286{ 315{
287 struct kmem_cache_order_objects x = { 316 struct kmem_cache_order_objects x = {
288 (order << OO_SHIFT) + (PAGE_SIZE << order) / size 317 (order << OO_SHIFT) + order_objects(order, size, reserved)
289 }; 318 };
290 319
291 return x; 320 return x;
@@ -617,7 +646,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
617 return 1; 646 return 1;
618 647
619 start = page_address(page); 648 start = page_address(page);
620 length = (PAGE_SIZE << compound_order(page)); 649 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
621 end = start + length; 650 end = start + length;
622 remainder = length % s->size; 651 remainder = length % s->size;
623 if (!remainder) 652 if (!remainder)
@@ -698,7 +727,7 @@ static int check_slab(struct kmem_cache *s, struct page *page)
698 return 0; 727 return 0;
699 } 728 }
700 729
701 maxobj = (PAGE_SIZE << compound_order(page)) / s->size; 730 maxobj = order_objects(compound_order(page), s->size, s->reserved);
702 if (page->objects > maxobj) { 731 if (page->objects > maxobj) {
703 slab_err(s, page, "objects %u > max %u", 732 slab_err(s, page, "objects %u > max %u",
704 s->name, page->objects, maxobj); 733 s->name, page->objects, maxobj);
@@ -748,7 +777,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
748 nr++; 777 nr++;
749 } 778 }
750 779
751 max_objects = (PAGE_SIZE << compound_order(page)) / s->size; 780 max_objects = order_objects(compound_order(page), s->size, s->reserved);
752 if (max_objects > MAX_OBJS_PER_PAGE) 781 if (max_objects > MAX_OBJS_PER_PAGE)
753 max_objects = MAX_OBJS_PER_PAGE; 782 max_objects = MAX_OBJS_PER_PAGE;
754 783
@@ -800,21 +829,31 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
800static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) 829static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
801{ 830{
802 flags &= gfp_allowed_mask; 831 flags &= gfp_allowed_mask;
803 kmemcheck_slab_alloc(s, flags, object, s->objsize); 832 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
804 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); 833 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
805} 834}
806 835
807static inline void slab_free_hook(struct kmem_cache *s, void *x) 836static inline void slab_free_hook(struct kmem_cache *s, void *x)
808{ 837{
809 kmemleak_free_recursive(x, s->flags); 838 kmemleak_free_recursive(x, s->flags);
810}
811 839
812static inline void slab_free_hook_irq(struct kmem_cache *s, void *object) 840 /*
813{ 841 * Trouble is that we may no longer disable interupts in the fast path
814 kmemcheck_slab_free(s, object, s->objsize); 842 * So in order to make the debug calls that expect irqs to be
815 debug_check_no_locks_freed(object, s->objsize); 843 * disabled we need to disable interrupts temporarily.
844 */
845#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
846 {
847 unsigned long flags;
848
849 local_irq_save(flags);
850 kmemcheck_slab_free(s, x, s->objsize);
851 debug_check_no_locks_freed(x, s->objsize);
852 local_irq_restore(flags);
853 }
854#endif
816 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 855 if (!(s->flags & SLAB_DEBUG_OBJECTS))
817 debug_check_no_obj_freed(object, s->objsize); 856 debug_check_no_obj_freed(x, s->objsize);
818} 857}
819 858
820/* 859/*
@@ -1101,9 +1140,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1101 1140
1102static inline void slab_free_hook(struct kmem_cache *s, void *x) {} 1141static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1103 1142
1104static inline void slab_free_hook_irq(struct kmem_cache *s,
1105 void *object) {}
1106
1107#endif /* CONFIG_SLUB_DEBUG */ 1143#endif /* CONFIG_SLUB_DEBUG */
1108 1144
1109/* 1145/*
@@ -1249,21 +1285,38 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1249 __free_pages(page, order); 1285 __free_pages(page, order);
1250} 1286}
1251 1287
1288#define need_reserve_slab_rcu \
1289 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1290
1252static void rcu_free_slab(struct rcu_head *h) 1291static void rcu_free_slab(struct rcu_head *h)
1253{ 1292{
1254 struct page *page; 1293 struct page *page;
1255 1294
1256 page = container_of((struct list_head *)h, struct page, lru); 1295 if (need_reserve_slab_rcu)
1296 page = virt_to_head_page(h);
1297 else
1298 page = container_of((struct list_head *)h, struct page, lru);
1299
1257 __free_slab(page->slab, page); 1300 __free_slab(page->slab, page);
1258} 1301}
1259 1302
1260static void free_slab(struct kmem_cache *s, struct page *page) 1303static void free_slab(struct kmem_cache *s, struct page *page)
1261{ 1304{
1262 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { 1305 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1263 /* 1306 struct rcu_head *head;
1264 * RCU free overloads the RCU head over the LRU 1307
1265 */ 1308 if (need_reserve_slab_rcu) {
1266 struct rcu_head *head = (void *)&page->lru; 1309 int order = compound_order(page);
1310 int offset = (PAGE_SIZE << order) - s->reserved;
1311
1312 VM_BUG_ON(s->reserved != sizeof(*head));
1313 head = page_address(page) + offset;
1314 } else {
1315 /*
1316 * RCU free overloads the RCU head over the LRU
1317 */
1318 head = (void *)&page->lru;
1319 }
1267 1320
1268 call_rcu(head, rcu_free_slab); 1321 call_rcu(head, rcu_free_slab);
1269 } else 1322 } else
@@ -1487,6 +1540,78 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1487 } 1540 }
1488} 1541}
1489 1542
1543#ifdef CONFIG_CMPXCHG_LOCAL
1544#ifdef CONFIG_PREEMPT
1545/*
1546 * Calculate the next globally unique transaction for disambiguiation
1547 * during cmpxchg. The transactions start with the cpu number and are then
1548 * incremented by CONFIG_NR_CPUS.
1549 */
1550#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1551#else
1552/*
1553 * No preemption supported therefore also no need to check for
1554 * different cpus.
1555 */
1556#define TID_STEP 1
1557#endif
1558
1559static inline unsigned long next_tid(unsigned long tid)
1560{
1561 return tid + TID_STEP;
1562}
1563
1564static inline unsigned int tid_to_cpu(unsigned long tid)
1565{
1566 return tid % TID_STEP;
1567}
1568
1569static inline unsigned long tid_to_event(unsigned long tid)
1570{
1571 return tid / TID_STEP;
1572}
1573
1574static inline unsigned int init_tid(int cpu)
1575{
1576 return cpu;
1577}
1578
1579static inline void note_cmpxchg_failure(const char *n,
1580 const struct kmem_cache *s, unsigned long tid)
1581{
1582#ifdef SLUB_DEBUG_CMPXCHG
1583 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1584
1585 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1586
1587#ifdef CONFIG_PREEMPT
1588 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1589 printk("due to cpu change %d -> %d\n",
1590 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1591 else
1592#endif
1593 if (tid_to_event(tid) != tid_to_event(actual_tid))
1594 printk("due to cpu running other code. Event %ld->%ld\n",
1595 tid_to_event(tid), tid_to_event(actual_tid));
1596 else
1597 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1598 actual_tid, tid, next_tid(tid));
1599#endif
1600 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1601}
1602
1603#endif
1604
1605void init_kmem_cache_cpus(struct kmem_cache *s)
1606{
1607#ifdef CONFIG_CMPXCHG_LOCAL
1608 int cpu;
1609
1610 for_each_possible_cpu(cpu)
1611 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1612#endif
1613
1614}
1490/* 1615/*
1491 * Remove the cpu slab 1616 * Remove the cpu slab
1492 */ 1617 */
@@ -1518,6 +1643,9 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1518 page->inuse--; 1643 page->inuse--;
1519 } 1644 }
1520 c->page = NULL; 1645 c->page = NULL;
1646#ifdef CONFIG_CMPXCHG_LOCAL
1647 c->tid = next_tid(c->tid);
1648#endif
1521 unfreeze_slab(s, page, tail); 1649 unfreeze_slab(s, page, tail);
1522} 1650}
1523 1651
@@ -1652,6 +1780,19 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1652{ 1780{
1653 void **object; 1781 void **object;
1654 struct page *new; 1782 struct page *new;
1783#ifdef CONFIG_CMPXCHG_LOCAL
1784 unsigned long flags;
1785
1786 local_irq_save(flags);
1787#ifdef CONFIG_PREEMPT
1788 /*
1789 * We may have been preempted and rescheduled on a different
1790 * cpu before disabling interrupts. Need to reload cpu area
1791 * pointer.
1792 */
1793 c = this_cpu_ptr(s->cpu_slab);
1794#endif
1795#endif
1655 1796
1656 /* We handle __GFP_ZERO in the caller */ 1797 /* We handle __GFP_ZERO in the caller */
1657 gfpflags &= ~__GFP_ZERO; 1798 gfpflags &= ~__GFP_ZERO;
@@ -1678,6 +1819,10 @@ load_freelist:
1678 c->node = page_to_nid(c->page); 1819 c->node = page_to_nid(c->page);
1679unlock_out: 1820unlock_out:
1680 slab_unlock(c->page); 1821 slab_unlock(c->page);
1822#ifdef CONFIG_CMPXCHG_LOCAL
1823 c->tid = next_tid(c->tid);
1824 local_irq_restore(flags);
1825#endif
1681 stat(s, ALLOC_SLOWPATH); 1826 stat(s, ALLOC_SLOWPATH);
1682 return object; 1827 return object;
1683 1828
@@ -1713,6 +1858,9 @@ new_slab:
1713 } 1858 }
1714 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) 1859 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
1715 slab_out_of_memory(s, gfpflags, node); 1860 slab_out_of_memory(s, gfpflags, node);
1861#ifdef CONFIG_CMPXCHG_LOCAL
1862 local_irq_restore(flags);
1863#endif
1716 return NULL; 1864 return NULL;
1717debug: 1865debug:
1718 if (!alloc_debug_processing(s, c->page, object, addr)) 1866 if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1739,23 +1887,76 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1739{ 1887{
1740 void **object; 1888 void **object;
1741 struct kmem_cache_cpu *c; 1889 struct kmem_cache_cpu *c;
1890#ifdef CONFIG_CMPXCHG_LOCAL
1891 unsigned long tid;
1892#else
1742 unsigned long flags; 1893 unsigned long flags;
1894#endif
1743 1895
1744 if (slab_pre_alloc_hook(s, gfpflags)) 1896 if (slab_pre_alloc_hook(s, gfpflags))
1745 return NULL; 1897 return NULL;
1746 1898
1899#ifndef CONFIG_CMPXCHG_LOCAL
1747 local_irq_save(flags); 1900 local_irq_save(flags);
1901#else
1902redo:
1903#endif
1904
1905 /*
1906 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
1907 * enabled. We may switch back and forth between cpus while
1908 * reading from one cpu area. That does not matter as long
1909 * as we end up on the original cpu again when doing the cmpxchg.
1910 */
1748 c = __this_cpu_ptr(s->cpu_slab); 1911 c = __this_cpu_ptr(s->cpu_slab);
1912
1913#ifdef CONFIG_CMPXCHG_LOCAL
1914 /*
1915 * The transaction ids are globally unique per cpu and per operation on
1916 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
1917 * occurs on the right processor and that there was no operation on the
1918 * linked list in between.
1919 */
1920 tid = c->tid;
1921 barrier();
1922#endif
1923
1749 object = c->freelist; 1924 object = c->freelist;
1750 if (unlikely(!object || !node_match(c, node))) 1925 if (unlikely(!object || !node_match(c, node)))
1751 1926
1752 object = __slab_alloc(s, gfpflags, node, addr, c); 1927 object = __slab_alloc(s, gfpflags, node, addr, c);
1753 1928
1754 else { 1929 else {
1930#ifdef CONFIG_CMPXCHG_LOCAL
1931 /*
1932 * The cmpxchg will only match if there was no additional
1933 * operation and if we are on the right processor.
1934 *
1935 * The cmpxchg does the following atomically (without lock semantics!)
1936 * 1. Relocate first pointer to the current per cpu area.
1937 * 2. Verify that tid and freelist have not been changed
1938 * 3. If they were not changed replace tid and freelist
1939 *
1940 * Since this is without lock semantics the protection is only against
1941 * code executing on this cpu *not* from access by other cpus.
1942 */
1943 if (unlikely(!this_cpu_cmpxchg_double(
1944 s->cpu_slab->freelist, s->cpu_slab->tid,
1945 object, tid,
1946 get_freepointer(s, object), next_tid(tid)))) {
1947
1948 note_cmpxchg_failure("slab_alloc", s, tid);
1949 goto redo;
1950 }
1951#else
1755 c->freelist = get_freepointer(s, object); 1952 c->freelist = get_freepointer(s, object);
1953#endif
1756 stat(s, ALLOC_FASTPATH); 1954 stat(s, ALLOC_FASTPATH);
1757 } 1955 }
1956
1957#ifndef CONFIG_CMPXCHG_LOCAL
1758 local_irq_restore(flags); 1958 local_irq_restore(flags);
1959#endif
1759 1960
1760 if (unlikely(gfpflags & __GFP_ZERO) && object) 1961 if (unlikely(gfpflags & __GFP_ZERO) && object)
1761 memset(object, 0, s->objsize); 1962 memset(object, 0, s->objsize);
@@ -1833,9 +2034,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1833{ 2034{
1834 void *prior; 2035 void *prior;
1835 void **object = (void *)x; 2036 void **object = (void *)x;
2037#ifdef CONFIG_CMPXCHG_LOCAL
2038 unsigned long flags;
1836 2039
1837 stat(s, FREE_SLOWPATH); 2040 local_irq_save(flags);
2041#endif
1838 slab_lock(page); 2042 slab_lock(page);
2043 stat(s, FREE_SLOWPATH);
1839 2044
1840 if (kmem_cache_debug(s)) 2045 if (kmem_cache_debug(s))
1841 goto debug; 2046 goto debug;
@@ -1865,6 +2070,9 @@ checks_ok:
1865 2070
1866out_unlock: 2071out_unlock:
1867 slab_unlock(page); 2072 slab_unlock(page);
2073#ifdef CONFIG_CMPXCHG_LOCAL
2074 local_irq_restore(flags);
2075#endif
1868 return; 2076 return;
1869 2077
1870slab_empty: 2078slab_empty:
@@ -1876,6 +2084,9 @@ slab_empty:
1876 stat(s, FREE_REMOVE_PARTIAL); 2084 stat(s, FREE_REMOVE_PARTIAL);
1877 } 2085 }
1878 slab_unlock(page); 2086 slab_unlock(page);
2087#ifdef CONFIG_CMPXCHG_LOCAL
2088 local_irq_restore(flags);
2089#endif
1879 stat(s, FREE_SLAB); 2090 stat(s, FREE_SLAB);
1880 discard_slab(s, page); 2091 discard_slab(s, page);
1881 return; 2092 return;
@@ -1902,23 +2113,56 @@ static __always_inline void slab_free(struct kmem_cache *s,
1902{ 2113{
1903 void **object = (void *)x; 2114 void **object = (void *)x;
1904 struct kmem_cache_cpu *c; 2115 struct kmem_cache_cpu *c;
2116#ifdef CONFIG_CMPXCHG_LOCAL
2117 unsigned long tid;
2118#else
1905 unsigned long flags; 2119 unsigned long flags;
2120#endif
1906 2121
1907 slab_free_hook(s, x); 2122 slab_free_hook(s, x);
1908 2123
2124#ifndef CONFIG_CMPXCHG_LOCAL
1909 local_irq_save(flags); 2125 local_irq_save(flags);
2126
2127#else
2128redo:
2129#endif
2130
2131 /*
2132 * Determine the currently cpus per cpu slab.
2133 * The cpu may change afterward. However that does not matter since
2134 * data is retrieved via this pointer. If we are on the same cpu
2135 * during the cmpxchg then the free will succedd.
2136 */
1910 c = __this_cpu_ptr(s->cpu_slab); 2137 c = __this_cpu_ptr(s->cpu_slab);
1911 2138
1912 slab_free_hook_irq(s, x); 2139#ifdef CONFIG_CMPXCHG_LOCAL
2140 tid = c->tid;
2141 barrier();
2142#endif
1913 2143
1914 if (likely(page == c->page && c->node != NUMA_NO_NODE)) { 2144 if (likely(page == c->page && c->node != NUMA_NO_NODE)) {
1915 set_freepointer(s, object, c->freelist); 2145 set_freepointer(s, object, c->freelist);
2146
2147#ifdef CONFIG_CMPXCHG_LOCAL
2148 if (unlikely(!this_cpu_cmpxchg_double(
2149 s->cpu_slab->freelist, s->cpu_slab->tid,
2150 c->freelist, tid,
2151 object, next_tid(tid)))) {
2152
2153 note_cmpxchg_failure("slab_free", s, tid);
2154 goto redo;
2155 }
2156#else
1916 c->freelist = object; 2157 c->freelist = object;
2158#endif
1917 stat(s, FREE_FASTPATH); 2159 stat(s, FREE_FASTPATH);
1918 } else 2160 } else
1919 __slab_free(s, page, x, addr); 2161 __slab_free(s, page, x, addr);
1920 2162
2163#ifndef CONFIG_CMPXCHG_LOCAL
1921 local_irq_restore(flags); 2164 local_irq_restore(flags);
2165#endif
1922} 2166}
1923 2167
1924void kmem_cache_free(struct kmem_cache *s, void *x) 2168void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -1988,13 +2232,13 @@ static int slub_nomerge;
1988 * the smallest order which will fit the object. 2232 * the smallest order which will fit the object.
1989 */ 2233 */
1990static inline int slab_order(int size, int min_objects, 2234static inline int slab_order(int size, int min_objects,
1991 int max_order, int fract_leftover) 2235 int max_order, int fract_leftover, int reserved)
1992{ 2236{
1993 int order; 2237 int order;
1994 int rem; 2238 int rem;
1995 int min_order = slub_min_order; 2239 int min_order = slub_min_order;
1996 2240
1997 if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE) 2241 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
1998 return get_order(size * MAX_OBJS_PER_PAGE) - 1; 2242 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
1999 2243
2000 for (order = max(min_order, 2244 for (order = max(min_order,
@@ -2003,10 +2247,10 @@ static inline int slab_order(int size, int min_objects,
2003 2247
2004 unsigned long slab_size = PAGE_SIZE << order; 2248 unsigned long slab_size = PAGE_SIZE << order;
2005 2249
2006 if (slab_size < min_objects * size) 2250 if (slab_size < min_objects * size + reserved)
2007 continue; 2251 continue;
2008 2252
2009 rem = slab_size % size; 2253 rem = (slab_size - reserved) % size;
2010 2254
2011 if (rem <= slab_size / fract_leftover) 2255 if (rem <= slab_size / fract_leftover)
2012 break; 2256 break;
@@ -2016,7 +2260,7 @@ static inline int slab_order(int size, int min_objects,
2016 return order; 2260 return order;
2017} 2261}
2018 2262
2019static inline int calculate_order(int size) 2263static inline int calculate_order(int size, int reserved)
2020{ 2264{
2021 int order; 2265 int order;
2022 int min_objects; 2266 int min_objects;
@@ -2034,14 +2278,14 @@ static inline int calculate_order(int size)
2034 min_objects = slub_min_objects; 2278 min_objects = slub_min_objects;
2035 if (!min_objects) 2279 if (!min_objects)
2036 min_objects = 4 * (fls(nr_cpu_ids) + 1); 2280 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2037 max_objects = (PAGE_SIZE << slub_max_order)/size; 2281 max_objects = order_objects(slub_max_order, size, reserved);
2038 min_objects = min(min_objects, max_objects); 2282 min_objects = min(min_objects, max_objects);
2039 2283
2040 while (min_objects > 1) { 2284 while (min_objects > 1) {
2041 fraction = 16; 2285 fraction = 16;
2042 while (fraction >= 4) { 2286 while (fraction >= 4) {
2043 order = slab_order(size, min_objects, 2287 order = slab_order(size, min_objects,
2044 slub_max_order, fraction); 2288 slub_max_order, fraction, reserved);
2045 if (order <= slub_max_order) 2289 if (order <= slub_max_order)
2046 return order; 2290 return order;
2047 fraction /= 2; 2291 fraction /= 2;
@@ -2053,14 +2297,14 @@ static inline int calculate_order(int size)
2053 * We were unable to place multiple objects in a slab. Now 2297 * We were unable to place multiple objects in a slab. Now
2054 * lets see if we can place a single object there. 2298 * lets see if we can place a single object there.
2055 */ 2299 */
2056 order = slab_order(size, 1, slub_max_order, 1); 2300 order = slab_order(size, 1, slub_max_order, 1, reserved);
2057 if (order <= slub_max_order) 2301 if (order <= slub_max_order)
2058 return order; 2302 return order;
2059 2303
2060 /* 2304 /*
2061 * Doh this slab cannot be placed using slub_max_order. 2305 * Doh this slab cannot be placed using slub_max_order.
2062 */ 2306 */
2063 order = slab_order(size, 1, MAX_ORDER, 1); 2307 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2064 if (order < MAX_ORDER) 2308 if (order < MAX_ORDER)
2065 return order; 2309 return order;
2066 return -ENOSYS; 2310 return -ENOSYS;
@@ -2110,9 +2354,23 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2110 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < 2354 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2111 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); 2355 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2112 2356
2357#ifdef CONFIG_CMPXCHG_LOCAL
2358 /*
2359 * Must align to double word boundary for the double cmpxchg instructions
2360 * to work.
2361 */
2362 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *));
2363#else
2364 /* Regular alignment is sufficient */
2113 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); 2365 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
2366#endif
2367
2368 if (!s->cpu_slab)
2369 return 0;
2114 2370
2115 return s->cpu_slab != NULL; 2371 init_kmem_cache_cpus(s);
2372
2373 return 1;
2116} 2374}
2117 2375
2118static struct kmem_cache *kmem_cache_node; 2376static struct kmem_cache *kmem_cache_node;
@@ -2311,7 +2569,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2311 if (forced_order >= 0) 2569 if (forced_order >= 0)
2312 order = forced_order; 2570 order = forced_order;
2313 else 2571 else
2314 order = calculate_order(size); 2572 order = calculate_order(size, s->reserved);
2315 2573
2316 if (order < 0) 2574 if (order < 0)
2317 return 0; 2575 return 0;
@@ -2329,8 +2587,8 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2329 /* 2587 /*
2330 * Determine the number of objects per slab 2588 * Determine the number of objects per slab
2331 */ 2589 */
2332 s->oo = oo_make(order, size); 2590 s->oo = oo_make(order, size, s->reserved);
2333 s->min = oo_make(get_order(size), size); 2591 s->min = oo_make(get_order(size), size, s->reserved);
2334 if (oo_objects(s->oo) > oo_objects(s->max)) 2592 if (oo_objects(s->oo) > oo_objects(s->max))
2335 s->max = s->oo; 2593 s->max = s->oo;
2336 2594
@@ -2349,6 +2607,10 @@ static int kmem_cache_open(struct kmem_cache *s,
2349 s->objsize = size; 2607 s->objsize = size;
2350 s->align = align; 2608 s->align = align;
2351 s->flags = kmem_cache_flags(size, flags, name, ctor); 2609 s->flags = kmem_cache_flags(size, flags, name, ctor);
2610 s->reserved = 0;
2611
2612 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
2613 s->reserved = sizeof(struct rcu_head);
2352 2614
2353 if (!calculate_sizes(s, -1)) 2615 if (!calculate_sizes(s, -1))
2354 goto error; 2616 goto error;
@@ -2399,12 +2661,6 @@ unsigned int kmem_cache_size(struct kmem_cache *s)
2399} 2661}
2400EXPORT_SYMBOL(kmem_cache_size); 2662EXPORT_SYMBOL(kmem_cache_size);
2401 2663
2402const char *kmem_cache_name(struct kmem_cache *s)
2403{
2404 return s->name;
2405}
2406EXPORT_SYMBOL(kmem_cache_name);
2407
2408static void list_slab_objects(struct kmem_cache *s, struct page *page, 2664static void list_slab_objects(struct kmem_cache *s, struct page *page,
2409 const char *text) 2665 const char *text)
2410{ 2666{
@@ -2696,7 +2952,6 @@ EXPORT_SYMBOL(__kmalloc_node);
2696size_t ksize(const void *object) 2952size_t ksize(const void *object)
2697{ 2953{
2698 struct page *page; 2954 struct page *page;
2699 struct kmem_cache *s;
2700 2955
2701 if (unlikely(object == ZERO_SIZE_PTR)) 2956 if (unlikely(object == ZERO_SIZE_PTR))
2702 return 0; 2957 return 0;
@@ -2707,28 +2962,8 @@ size_t ksize(const void *object)
2707 WARN_ON(!PageCompound(page)); 2962 WARN_ON(!PageCompound(page));
2708 return PAGE_SIZE << compound_order(page); 2963 return PAGE_SIZE << compound_order(page);
2709 } 2964 }
2710 s = page->slab;
2711
2712#ifdef CONFIG_SLUB_DEBUG
2713 /*
2714 * Debugging requires use of the padding between object
2715 * and whatever may come after it.
2716 */
2717 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
2718 return s->objsize;
2719 2965
2720#endif 2966 return slab_ksize(page->slab);
2721 /*
2722 * If we have the need to store the freelist pointer
2723 * back there or track user information then we can
2724 * only use the space before that information.
2725 */
2726 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
2727 return s->inuse;
2728 /*
2729 * Else we can use all the padding etc for the allocation
2730 */
2731 return s->size;
2732} 2967}
2733EXPORT_SYMBOL(ksize); 2968EXPORT_SYMBOL(ksize);
2734 2969
@@ -3312,7 +3547,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3312 3547
3313 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); 3548 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
3314 3549
3315 /* Honor the call site pointer we recieved. */ 3550 /* Honor the call site pointer we received. */
3316 trace_kmalloc(caller, ret, size, s->size, gfpflags); 3551 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3317 3552
3318 return ret; 3553 return ret;
@@ -3342,7 +3577,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3342 3577
3343 ret = slab_alloc(s, gfpflags, node, caller); 3578 ret = slab_alloc(s, gfpflags, node, caller);
3344 3579
3345 /* Honor the call site pointer we recieved. */ 3580 /* Honor the call site pointer we received. */
3346 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); 3581 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
3347 3582
3348 return ret; 3583 return ret;
@@ -4017,6 +4252,12 @@ static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4017} 4252}
4018SLAB_ATTR_RO(destroy_by_rcu); 4253SLAB_ATTR_RO(destroy_by_rcu);
4019 4254
4255static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4256{
4257 return sprintf(buf, "%d\n", s->reserved);
4258}
4259SLAB_ATTR_RO(reserved);
4260
4020#ifdef CONFIG_SLUB_DEBUG 4261#ifdef CONFIG_SLUB_DEBUG
4021static ssize_t slabs_show(struct kmem_cache *s, char *buf) 4262static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4022{ 4263{
@@ -4303,6 +4544,7 @@ static struct attribute *slab_attrs[] = {
4303 &reclaim_account_attr.attr, 4544 &reclaim_account_attr.attr,
4304 &destroy_by_rcu_attr.attr, 4545 &destroy_by_rcu_attr.attr,
4305 &shrink_attr.attr, 4546 &shrink_attr.attr,
4547 &reserved_attr.attr,
4306#ifdef CONFIG_SLUB_DEBUG 4548#ifdef CONFIG_SLUB_DEBUG
4307 &total_objects_attr.attr, 4549 &total_objects_attr.attr,
4308 &slabs_attr.attr, 4550 &slabs_attr.attr,