diff options
-rw-r--r-- | include/linux/slub_def.h | 5 | ||||
-rw-r--r-- | mm/slub.c | 205 |
2 files changed, 207 insertions, 3 deletions
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 875df55ab36d..009b0020079d 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -35,7 +35,10 @@ enum stat_item { | |||
35 | NR_SLUB_STAT_ITEMS }; | 35 | NR_SLUB_STAT_ITEMS }; |
36 | 36 | ||
37 | struct kmem_cache_cpu { | 37 | struct kmem_cache_cpu { |
38 | void **freelist; /* Pointer to first free per cpu object */ | 38 | void **freelist; /* Pointer to next available object */ |
39 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
40 | unsigned long tid; /* Globally unique transaction id */ | ||
41 | #endif | ||
39 | struct page *page; /* The slab from which we are allocating */ | 42 | struct page *page; /* The slab from which we are allocating */ |
40 | int node; /* The node of the page (or -1 for debug) */ | 43 | int node; /* The node of the page (or -1 for debug) */ |
41 | #ifdef CONFIG_SLUB_STATS | 44 | #ifdef CONFIG_SLUB_STATS |
@@ -1494,6 +1494,77 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1494 | } | 1494 | } |
1495 | } | 1495 | } |
1496 | 1496 | ||
1497 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1498 | #ifdef CONFIG_PREEMPT | ||
1499 | /* | ||
1500 | * Calculate the next globally unique transaction for disambiguiation | ||
1501 | * during cmpxchg. The transactions start with the cpu number and are then | ||
1502 | * incremented by CONFIG_NR_CPUS. | ||
1503 | */ | ||
1504 | #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS) | ||
1505 | #else | ||
1506 | /* | ||
1507 | * No preemption supported therefore also no need to check for | ||
1508 | * different cpus. | ||
1509 | */ | ||
1510 | #define TID_STEP 1 | ||
1511 | #endif | ||
1512 | |||
1513 | static inline unsigned long next_tid(unsigned long tid) | ||
1514 | { | ||
1515 | return tid + TID_STEP; | ||
1516 | } | ||
1517 | |||
1518 | static inline unsigned int tid_to_cpu(unsigned long tid) | ||
1519 | { | ||
1520 | return tid % TID_STEP; | ||
1521 | } | ||
1522 | |||
1523 | static inline unsigned long tid_to_event(unsigned long tid) | ||
1524 | { | ||
1525 | return tid / TID_STEP; | ||
1526 | } | ||
1527 | |||
1528 | static inline unsigned int init_tid(int cpu) | ||
1529 | { | ||
1530 | return cpu; | ||
1531 | } | ||
1532 | |||
1533 | static inline void note_cmpxchg_failure(const char *n, | ||
1534 | const struct kmem_cache *s, unsigned long tid) | ||
1535 | { | ||
1536 | #ifdef SLUB_DEBUG_CMPXCHG | ||
1537 | unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid); | ||
1538 | |||
1539 | printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name); | ||
1540 | |||
1541 | #ifdef CONFIG_PREEMPT | ||
1542 | if (tid_to_cpu(tid) != tid_to_cpu(actual_tid)) | ||
1543 | printk("due to cpu change %d -> %d\n", | ||
1544 | tid_to_cpu(tid), tid_to_cpu(actual_tid)); | ||
1545 | else | ||
1546 | #endif | ||
1547 | if (tid_to_event(tid) != tid_to_event(actual_tid)) | ||
1548 | printk("due to cpu running other code. Event %ld->%ld\n", | ||
1549 | tid_to_event(tid), tid_to_event(actual_tid)); | ||
1550 | else | ||
1551 | printk("for unknown reason: actual=%lx was=%lx target=%lx\n", | ||
1552 | actual_tid, tid, next_tid(tid)); | ||
1553 | #endif | ||
1554 | } | ||
1555 | |||
1556 | #endif | ||
1557 | |||
1558 | void init_kmem_cache_cpus(struct kmem_cache *s) | ||
1559 | { | ||
1560 | #if defined(CONFIG_CMPXCHG_LOCAL) && defined(CONFIG_PREEMPT) | ||
1561 | int cpu; | ||
1562 | |||
1563 | for_each_possible_cpu(cpu) | ||
1564 | per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); | ||
1565 | #endif | ||
1566 | |||
1567 | } | ||
1497 | /* | 1568 | /* |
1498 | * Remove the cpu slab | 1569 | * Remove the cpu slab |
1499 | */ | 1570 | */ |
@@ -1525,6 +1596,9 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1525 | page->inuse--; | 1596 | page->inuse--; |
1526 | } | 1597 | } |
1527 | c->page = NULL; | 1598 | c->page = NULL; |
1599 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1600 | c->tid = next_tid(c->tid); | ||
1601 | #endif | ||
1528 | unfreeze_slab(s, page, tail); | 1602 | unfreeze_slab(s, page, tail); |
1529 | } | 1603 | } |
1530 | 1604 | ||
@@ -1659,6 +1733,19 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
1659 | { | 1733 | { |
1660 | void **object; | 1734 | void **object; |
1661 | struct page *new; | 1735 | struct page *new; |
1736 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1737 | unsigned long flags; | ||
1738 | |||
1739 | local_irq_save(flags); | ||
1740 | #ifdef CONFIG_PREEMPT | ||
1741 | /* | ||
1742 | * We may have been preempted and rescheduled on a different | ||
1743 | * cpu before disabling interrupts. Need to reload cpu area | ||
1744 | * pointer. | ||
1745 | */ | ||
1746 | c = this_cpu_ptr(s->cpu_slab); | ||
1747 | #endif | ||
1748 | #endif | ||
1662 | 1749 | ||
1663 | /* We handle __GFP_ZERO in the caller */ | 1750 | /* We handle __GFP_ZERO in the caller */ |
1664 | gfpflags &= ~__GFP_ZERO; | 1751 | gfpflags &= ~__GFP_ZERO; |
@@ -1685,6 +1772,10 @@ load_freelist: | |||
1685 | c->node = page_to_nid(c->page); | 1772 | c->node = page_to_nid(c->page); |
1686 | unlock_out: | 1773 | unlock_out: |
1687 | slab_unlock(c->page); | 1774 | slab_unlock(c->page); |
1775 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1776 | c->tid = next_tid(c->tid); | ||
1777 | local_irq_restore(flags); | ||
1778 | #endif | ||
1688 | stat(s, ALLOC_SLOWPATH); | 1779 | stat(s, ALLOC_SLOWPATH); |
1689 | return object; | 1780 | return object; |
1690 | 1781 | ||
@@ -1746,23 +1837,76 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1746 | { | 1837 | { |
1747 | void **object; | 1838 | void **object; |
1748 | struct kmem_cache_cpu *c; | 1839 | struct kmem_cache_cpu *c; |
1840 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1841 | unsigned long tid; | ||
1842 | #else | ||
1749 | unsigned long flags; | 1843 | unsigned long flags; |
1844 | #endif | ||
1750 | 1845 | ||
1751 | if (slab_pre_alloc_hook(s, gfpflags)) | 1846 | if (slab_pre_alloc_hook(s, gfpflags)) |
1752 | return NULL; | 1847 | return NULL; |
1753 | 1848 | ||
1849 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1754 | local_irq_save(flags); | 1850 | local_irq_save(flags); |
1851 | #else | ||
1852 | redo: | ||
1853 | #endif | ||
1854 | |||
1855 | /* | ||
1856 | * Must read kmem_cache cpu data via this cpu ptr. Preemption is | ||
1857 | * enabled. We may switch back and forth between cpus while | ||
1858 | * reading from one cpu area. That does not matter as long | ||
1859 | * as we end up on the original cpu again when doing the cmpxchg. | ||
1860 | */ | ||
1755 | c = __this_cpu_ptr(s->cpu_slab); | 1861 | c = __this_cpu_ptr(s->cpu_slab); |
1862 | |||
1863 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1864 | /* | ||
1865 | * The transaction ids are globally unique per cpu and per operation on | ||
1866 | * a per cpu queue. Thus they can be guarantee that the cmpxchg_double | ||
1867 | * occurs on the right processor and that there was no operation on the | ||
1868 | * linked list in between. | ||
1869 | */ | ||
1870 | tid = c->tid; | ||
1871 | barrier(); | ||
1872 | #endif | ||
1873 | |||
1756 | object = c->freelist; | 1874 | object = c->freelist; |
1757 | if (unlikely(!object || !node_match(c, node))) | 1875 | if (unlikely(!object || !node_match(c, node))) |
1758 | 1876 | ||
1759 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1877 | object = __slab_alloc(s, gfpflags, node, addr, c); |
1760 | 1878 | ||
1761 | else { | 1879 | else { |
1880 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1881 | /* | ||
1882 | * The cmpxchg will only match if there was no additonal | ||
1883 | * operation and if we are on the right processor. | ||
1884 | * | ||
1885 | * The cmpxchg does the following atomically (without lock semantics!) | ||
1886 | * 1. Relocate first pointer to the current per cpu area. | ||
1887 | * 2. Verify that tid and freelist have not been changed | ||
1888 | * 3. If they were not changed replace tid and freelist | ||
1889 | * | ||
1890 | * Since this is without lock semantics the protection is only against | ||
1891 | * code executing on this cpu *not* from access by other cpus. | ||
1892 | */ | ||
1893 | if (unlikely(!this_cpu_cmpxchg_double( | ||
1894 | s->cpu_slab->freelist, s->cpu_slab->tid, | ||
1895 | object, tid, | ||
1896 | get_freepointer(s, object), next_tid(tid)))) { | ||
1897 | |||
1898 | note_cmpxchg_failure("slab_alloc", s, tid); | ||
1899 | goto redo; | ||
1900 | } | ||
1901 | #else | ||
1762 | c->freelist = get_freepointer(s, object); | 1902 | c->freelist = get_freepointer(s, object); |
1903 | #endif | ||
1763 | stat(s, ALLOC_FASTPATH); | 1904 | stat(s, ALLOC_FASTPATH); |
1764 | } | 1905 | } |
1906 | |||
1907 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1765 | local_irq_restore(flags); | 1908 | local_irq_restore(flags); |
1909 | #endif | ||
1766 | 1910 | ||
1767 | if (unlikely(gfpflags & __GFP_ZERO) && object) | 1911 | if (unlikely(gfpflags & __GFP_ZERO) && object) |
1768 | memset(object, 0, s->objsize); | 1912 | memset(object, 0, s->objsize); |
@@ -1840,9 +1984,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
1840 | { | 1984 | { |
1841 | void *prior; | 1985 | void *prior; |
1842 | void **object = (void *)x; | 1986 | void **object = (void *)x; |
1987 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1988 | unsigned long flags; | ||
1843 | 1989 | ||
1844 | stat(s, FREE_SLOWPATH); | 1990 | local_irq_save(flags); |
1991 | #endif | ||
1845 | slab_lock(page); | 1992 | slab_lock(page); |
1993 | stat(s, FREE_SLOWPATH); | ||
1846 | 1994 | ||
1847 | if (kmem_cache_debug(s)) | 1995 | if (kmem_cache_debug(s)) |
1848 | goto debug; | 1996 | goto debug; |
@@ -1872,6 +2020,9 @@ checks_ok: | |||
1872 | 2020 | ||
1873 | out_unlock: | 2021 | out_unlock: |
1874 | slab_unlock(page); | 2022 | slab_unlock(page); |
2023 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2024 | local_irq_restore(flags); | ||
2025 | #endif | ||
1875 | return; | 2026 | return; |
1876 | 2027 | ||
1877 | slab_empty: | 2028 | slab_empty: |
@@ -1883,6 +2034,9 @@ slab_empty: | |||
1883 | stat(s, FREE_REMOVE_PARTIAL); | 2034 | stat(s, FREE_REMOVE_PARTIAL); |
1884 | } | 2035 | } |
1885 | slab_unlock(page); | 2036 | slab_unlock(page); |
2037 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2038 | local_irq_restore(flags); | ||
2039 | #endif | ||
1886 | stat(s, FREE_SLAB); | 2040 | stat(s, FREE_SLAB); |
1887 | discard_slab(s, page); | 2041 | discard_slab(s, page); |
1888 | return; | 2042 | return; |
@@ -1909,21 +2063,54 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1909 | { | 2063 | { |
1910 | void **object = (void *)x; | 2064 | void **object = (void *)x; |
1911 | struct kmem_cache_cpu *c; | 2065 | struct kmem_cache_cpu *c; |
2066 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2067 | unsigned long tid; | ||
2068 | #else | ||
1912 | unsigned long flags; | 2069 | unsigned long flags; |
2070 | #endif | ||
1913 | 2071 | ||
1914 | slab_free_hook(s, x); | 2072 | slab_free_hook(s, x); |
1915 | 2073 | ||
2074 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1916 | local_irq_save(flags); | 2075 | local_irq_save(flags); |
2076 | #endif | ||
2077 | |||
2078 | redo: | ||
2079 | /* | ||
2080 | * Determine the currently cpus per cpu slab. | ||
2081 | * The cpu may change afterward. However that does not matter since | ||
2082 | * data is retrieved via this pointer. If we are on the same cpu | ||
2083 | * during the cmpxchg then the free will succedd. | ||
2084 | */ | ||
1917 | c = __this_cpu_ptr(s->cpu_slab); | 2085 | c = __this_cpu_ptr(s->cpu_slab); |
1918 | 2086 | ||
2087 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2088 | tid = c->tid; | ||
2089 | barrier(); | ||
2090 | #endif | ||
2091 | |||
1919 | if (likely(page == c->page && c->node != NUMA_NO_NODE)) { | 2092 | if (likely(page == c->page && c->node != NUMA_NO_NODE)) { |
1920 | set_freepointer(s, object, c->freelist); | 2093 | set_freepointer(s, object, c->freelist); |
2094 | |||
2095 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2096 | if (unlikely(!this_cpu_cmpxchg_double( | ||
2097 | s->cpu_slab->freelist, s->cpu_slab->tid, | ||
2098 | c->freelist, tid, | ||
2099 | object, next_tid(tid)))) { | ||
2100 | |||
2101 | note_cmpxchg_failure("slab_free", s, tid); | ||
2102 | goto redo; | ||
2103 | } | ||
2104 | #else | ||
1921 | c->freelist = object; | 2105 | c->freelist = object; |
2106 | #endif | ||
1922 | stat(s, FREE_FASTPATH); | 2107 | stat(s, FREE_FASTPATH); |
1923 | } else | 2108 | } else |
1924 | __slab_free(s, page, x, addr); | 2109 | __slab_free(s, page, x, addr); |
1925 | 2110 | ||
2111 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1926 | local_irq_restore(flags); | 2112 | local_irq_restore(flags); |
2113 | #endif | ||
1927 | } | 2114 | } |
1928 | 2115 | ||
1929 | void kmem_cache_free(struct kmem_cache *s, void *x) | 2116 | void kmem_cache_free(struct kmem_cache *s, void *x) |
@@ -2115,9 +2302,23 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) | |||
2115 | BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < | 2302 | BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < |
2116 | SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); | 2303 | SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); |
2117 | 2304 | ||
2305 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2306 | /* | ||
2307 | * Must align to double word boundary for the double cmpxchg instructions | ||
2308 | * to work. | ||
2309 | */ | ||
2310 | s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *)); | ||
2311 | #else | ||
2312 | /* Regular alignment is sufficient */ | ||
2118 | s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); | 2313 | s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); |
2314 | #endif | ||
2315 | |||
2316 | if (!s->cpu_slab) | ||
2317 | return 0; | ||
2318 | |||
2319 | init_kmem_cache_cpus(s); | ||
2119 | 2320 | ||
2120 | return s->cpu_slab != NULL; | 2321 | return 1; |
2121 | } | 2322 | } |
2122 | 2323 | ||
2123 | static struct kmem_cache *kmem_cache_node; | 2324 | static struct kmem_cache *kmem_cache_node; |