aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/slub_def.h5
-rw-r--r--mm/slub.c205
2 files changed, 207 insertions, 3 deletions
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 875df55ab36d..009b0020079d 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -35,7 +35,10 @@ enum stat_item {
35 NR_SLUB_STAT_ITEMS }; 35 NR_SLUB_STAT_ITEMS };
36 36
37struct kmem_cache_cpu { 37struct kmem_cache_cpu {
38 void **freelist; /* Pointer to first free per cpu object */ 38 void **freelist; /* Pointer to next available object */
39#ifdef CONFIG_CMPXCHG_LOCAL
40 unsigned long tid; /* Globally unique transaction id */
41#endif
39 struct page *page; /* The slab from which we are allocating */ 42 struct page *page; /* The slab from which we are allocating */
40 int node; /* The node of the page (or -1 for debug) */ 43 int node; /* The node of the page (or -1 for debug) */
41#ifdef CONFIG_SLUB_STATS 44#ifdef CONFIG_SLUB_STATS
diff --git a/mm/slub.c b/mm/slub.c
index bae7a5c636f4..65030c7fd7e2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1494,6 +1494,77 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1494 } 1494 }
1495} 1495}
1496 1496
1497#ifdef CONFIG_CMPXCHG_LOCAL
1498#ifdef CONFIG_PREEMPT
1499/*
1500 * Calculate the next globally unique transaction for disambiguiation
1501 * during cmpxchg. The transactions start with the cpu number and are then
1502 * incremented by CONFIG_NR_CPUS.
1503 */
1504#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1505#else
1506/*
1507 * No preemption supported therefore also no need to check for
1508 * different cpus.
1509 */
1510#define TID_STEP 1
1511#endif
1512
1513static inline unsigned long next_tid(unsigned long tid)
1514{
1515 return tid + TID_STEP;
1516}
1517
1518static inline unsigned int tid_to_cpu(unsigned long tid)
1519{
1520 return tid % TID_STEP;
1521}
1522
1523static inline unsigned long tid_to_event(unsigned long tid)
1524{
1525 return tid / TID_STEP;
1526}
1527
1528static inline unsigned int init_tid(int cpu)
1529{
1530 return cpu;
1531}
1532
1533static inline void note_cmpxchg_failure(const char *n,
1534 const struct kmem_cache *s, unsigned long tid)
1535{
1536#ifdef SLUB_DEBUG_CMPXCHG
1537 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1538
1539 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1540
1541#ifdef CONFIG_PREEMPT
1542 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1543 printk("due to cpu change %d -> %d\n",
1544 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1545 else
1546#endif
1547 if (tid_to_event(tid) != tid_to_event(actual_tid))
1548 printk("due to cpu running other code. Event %ld->%ld\n",
1549 tid_to_event(tid), tid_to_event(actual_tid));
1550 else
1551 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1552 actual_tid, tid, next_tid(tid));
1553#endif
1554}
1555
1556#endif
1557
1558void init_kmem_cache_cpus(struct kmem_cache *s)
1559{
1560#if defined(CONFIG_CMPXCHG_LOCAL) && defined(CONFIG_PREEMPT)
1561 int cpu;
1562
1563 for_each_possible_cpu(cpu)
1564 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1565#endif
1566
1567}
1497/* 1568/*
1498 * Remove the cpu slab 1569 * Remove the cpu slab
1499 */ 1570 */
@@ -1525,6 +1596,9 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1525 page->inuse--; 1596 page->inuse--;
1526 } 1597 }
1527 c->page = NULL; 1598 c->page = NULL;
1599#ifdef CONFIG_CMPXCHG_LOCAL
1600 c->tid = next_tid(c->tid);
1601#endif
1528 unfreeze_slab(s, page, tail); 1602 unfreeze_slab(s, page, tail);
1529} 1603}
1530 1604
@@ -1659,6 +1733,19 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1659{ 1733{
1660 void **object; 1734 void **object;
1661 struct page *new; 1735 struct page *new;
1736#ifdef CONFIG_CMPXCHG_LOCAL
1737 unsigned long flags;
1738
1739 local_irq_save(flags);
1740#ifdef CONFIG_PREEMPT
1741 /*
1742 * We may have been preempted and rescheduled on a different
1743 * cpu before disabling interrupts. Need to reload cpu area
1744 * pointer.
1745 */
1746 c = this_cpu_ptr(s->cpu_slab);
1747#endif
1748#endif
1662 1749
1663 /* We handle __GFP_ZERO in the caller */ 1750 /* We handle __GFP_ZERO in the caller */
1664 gfpflags &= ~__GFP_ZERO; 1751 gfpflags &= ~__GFP_ZERO;
@@ -1685,6 +1772,10 @@ load_freelist:
1685 c->node = page_to_nid(c->page); 1772 c->node = page_to_nid(c->page);
1686unlock_out: 1773unlock_out:
1687 slab_unlock(c->page); 1774 slab_unlock(c->page);
1775#ifdef CONFIG_CMPXCHG_LOCAL
1776 c->tid = next_tid(c->tid);
1777 local_irq_restore(flags);
1778#endif
1688 stat(s, ALLOC_SLOWPATH); 1779 stat(s, ALLOC_SLOWPATH);
1689 return object; 1780 return object;
1690 1781
@@ -1746,23 +1837,76 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1746{ 1837{
1747 void **object; 1838 void **object;
1748 struct kmem_cache_cpu *c; 1839 struct kmem_cache_cpu *c;
1840#ifdef CONFIG_CMPXCHG_LOCAL
1841 unsigned long tid;
1842#else
1749 unsigned long flags; 1843 unsigned long flags;
1844#endif
1750 1845
1751 if (slab_pre_alloc_hook(s, gfpflags)) 1846 if (slab_pre_alloc_hook(s, gfpflags))
1752 return NULL; 1847 return NULL;
1753 1848
1849#ifndef CONFIG_CMPXCHG_LOCAL
1754 local_irq_save(flags); 1850 local_irq_save(flags);
1851#else
1852redo:
1853#endif
1854
1855 /*
1856 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
1857 * enabled. We may switch back and forth between cpus while
1858 * reading from one cpu area. That does not matter as long
1859 * as we end up on the original cpu again when doing the cmpxchg.
1860 */
1755 c = __this_cpu_ptr(s->cpu_slab); 1861 c = __this_cpu_ptr(s->cpu_slab);
1862
1863#ifdef CONFIG_CMPXCHG_LOCAL
1864 /*
1865 * The transaction ids are globally unique per cpu and per operation on
1866 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
1867 * occurs on the right processor and that there was no operation on the
1868 * linked list in between.
1869 */
1870 tid = c->tid;
1871 barrier();
1872#endif
1873
1756 object = c->freelist; 1874 object = c->freelist;
1757 if (unlikely(!object || !node_match(c, node))) 1875 if (unlikely(!object || !node_match(c, node)))
1758 1876
1759 object = __slab_alloc(s, gfpflags, node, addr, c); 1877 object = __slab_alloc(s, gfpflags, node, addr, c);
1760 1878
1761 else { 1879 else {
1880#ifdef CONFIG_CMPXCHG_LOCAL
1881 /*
1882 * The cmpxchg will only match if there was no additonal
1883 * operation and if we are on the right processor.
1884 *
1885 * The cmpxchg does the following atomically (without lock semantics!)
1886 * 1. Relocate first pointer to the current per cpu area.
1887 * 2. Verify that tid and freelist have not been changed
1888 * 3. If they were not changed replace tid and freelist
1889 *
1890 * Since this is without lock semantics the protection is only against
1891 * code executing on this cpu *not* from access by other cpus.
1892 */
1893 if (unlikely(!this_cpu_cmpxchg_double(
1894 s->cpu_slab->freelist, s->cpu_slab->tid,
1895 object, tid,
1896 get_freepointer(s, object), next_tid(tid)))) {
1897
1898 note_cmpxchg_failure("slab_alloc", s, tid);
1899 goto redo;
1900 }
1901#else
1762 c->freelist = get_freepointer(s, object); 1902 c->freelist = get_freepointer(s, object);
1903#endif
1763 stat(s, ALLOC_FASTPATH); 1904 stat(s, ALLOC_FASTPATH);
1764 } 1905 }
1906
1907#ifndef CONFIG_CMPXCHG_LOCAL
1765 local_irq_restore(flags); 1908 local_irq_restore(flags);
1909#endif
1766 1910
1767 if (unlikely(gfpflags & __GFP_ZERO) && object) 1911 if (unlikely(gfpflags & __GFP_ZERO) && object)
1768 memset(object, 0, s->objsize); 1912 memset(object, 0, s->objsize);
@@ -1840,9 +1984,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1840{ 1984{
1841 void *prior; 1985 void *prior;
1842 void **object = (void *)x; 1986 void **object = (void *)x;
1987#ifdef CONFIG_CMPXCHG_LOCAL
1988 unsigned long flags;
1843 1989
1844 stat(s, FREE_SLOWPATH); 1990 local_irq_save(flags);
1991#endif
1845 slab_lock(page); 1992 slab_lock(page);
1993 stat(s, FREE_SLOWPATH);
1846 1994
1847 if (kmem_cache_debug(s)) 1995 if (kmem_cache_debug(s))
1848 goto debug; 1996 goto debug;
@@ -1872,6 +2020,9 @@ checks_ok:
1872 2020
1873out_unlock: 2021out_unlock:
1874 slab_unlock(page); 2022 slab_unlock(page);
2023#ifdef CONFIG_CMPXCHG_LOCAL
2024 local_irq_restore(flags);
2025#endif
1875 return; 2026 return;
1876 2027
1877slab_empty: 2028slab_empty:
@@ -1883,6 +2034,9 @@ slab_empty:
1883 stat(s, FREE_REMOVE_PARTIAL); 2034 stat(s, FREE_REMOVE_PARTIAL);
1884 } 2035 }
1885 slab_unlock(page); 2036 slab_unlock(page);
2037#ifdef CONFIG_CMPXCHG_LOCAL
2038 local_irq_restore(flags);
2039#endif
1886 stat(s, FREE_SLAB); 2040 stat(s, FREE_SLAB);
1887 discard_slab(s, page); 2041 discard_slab(s, page);
1888 return; 2042 return;
@@ -1909,21 +2063,54 @@ static __always_inline void slab_free(struct kmem_cache *s,
1909{ 2063{
1910 void **object = (void *)x; 2064 void **object = (void *)x;
1911 struct kmem_cache_cpu *c; 2065 struct kmem_cache_cpu *c;
2066#ifdef CONFIG_CMPXCHG_LOCAL
2067 unsigned long tid;
2068#else
1912 unsigned long flags; 2069 unsigned long flags;
2070#endif
1913 2071
1914 slab_free_hook(s, x); 2072 slab_free_hook(s, x);
1915 2073
2074#ifndef CONFIG_CMPXCHG_LOCAL
1916 local_irq_save(flags); 2075 local_irq_save(flags);
2076#endif
2077
2078redo:
2079 /*
2080 * Determine the currently cpus per cpu slab.
2081 * The cpu may change afterward. However that does not matter since
2082 * data is retrieved via this pointer. If we are on the same cpu
2083 * during the cmpxchg then the free will succedd.
2084 */
1917 c = __this_cpu_ptr(s->cpu_slab); 2085 c = __this_cpu_ptr(s->cpu_slab);
1918 2086
2087#ifdef CONFIG_CMPXCHG_LOCAL
2088 tid = c->tid;
2089 barrier();
2090#endif
2091
1919 if (likely(page == c->page && c->node != NUMA_NO_NODE)) { 2092 if (likely(page == c->page && c->node != NUMA_NO_NODE)) {
1920 set_freepointer(s, object, c->freelist); 2093 set_freepointer(s, object, c->freelist);
2094
2095#ifdef CONFIG_CMPXCHG_LOCAL
2096 if (unlikely(!this_cpu_cmpxchg_double(
2097 s->cpu_slab->freelist, s->cpu_slab->tid,
2098 c->freelist, tid,
2099 object, next_tid(tid)))) {
2100
2101 note_cmpxchg_failure("slab_free", s, tid);
2102 goto redo;
2103 }
2104#else
1921 c->freelist = object; 2105 c->freelist = object;
2106#endif
1922 stat(s, FREE_FASTPATH); 2107 stat(s, FREE_FASTPATH);
1923 } else 2108 } else
1924 __slab_free(s, page, x, addr); 2109 __slab_free(s, page, x, addr);
1925 2110
2111#ifndef CONFIG_CMPXCHG_LOCAL
1926 local_irq_restore(flags); 2112 local_irq_restore(flags);
2113#endif
1927} 2114}
1928 2115
1929void kmem_cache_free(struct kmem_cache *s, void *x) 2116void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -2115,9 +2302,23 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2115 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < 2302 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2116 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); 2303 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2117 2304
2305#ifdef CONFIG_CMPXCHG_LOCAL
2306 /*
2307 * Must align to double word boundary for the double cmpxchg instructions
2308 * to work.
2309 */
2310 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *));
2311#else
2312 /* Regular alignment is sufficient */
2118 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); 2313 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
2314#endif
2315
2316 if (!s->cpu_slab)
2317 return 0;
2318
2319 init_kmem_cache_cpus(s);
2119 2320
2120 return s->cpu_slab != NULL; 2321 return 1;
2121} 2322}
2122 2323
2123static struct kmem_cache *kmem_cache_node; 2324static struct kmem_cache *kmem_cache_node;