diff options
author | Pekka Enberg <penberg@kernel.org> | 2011-03-20 12:13:26 -0400 |
---|---|---|
committer | Pekka Enberg <penberg@kernel.org> | 2011-03-20 12:13:26 -0400 |
commit | e8c500c2b64b6e237e67ecba7249e72363c47047 (patch) | |
tree | e9c62e59a879ebef45b0fc2823d318b2fb2fed84 /mm/slub.c | |
parent | c53badd0801728feedfcccae04239410b52b0d03 (diff) | |
parent | a24c5a0ea902bcda348f086bd909cc2d6e305bf8 (diff) |
Merge branch 'slub/lockless' into for-linus
Conflicts:
include/linux/slub_def.h
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 234 |
1 files changed, 221 insertions, 13 deletions
@@ -836,14 +836,24 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void | |||
836 | static inline void slab_free_hook(struct kmem_cache *s, void *x) | 836 | static inline void slab_free_hook(struct kmem_cache *s, void *x) |
837 | { | 837 | { |
838 | kmemleak_free_recursive(x, s->flags); | 838 | kmemleak_free_recursive(x, s->flags); |
839 | } | ||
840 | 839 | ||
841 | static inline void slab_free_hook_irq(struct kmem_cache *s, void *object) | 840 | /* |
842 | { | 841 | * Trouble is that we may no longer disable interupts in the fast path |
843 | kmemcheck_slab_free(s, object, s->objsize); | 842 | * So in order to make the debug calls that expect irqs to be |
844 | debug_check_no_locks_freed(object, s->objsize); | 843 | * disabled we need to disable interrupts temporarily. |
845 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | 844 | */ |
846 | debug_check_no_obj_freed(object, s->objsize); | 845 | #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) |
846 | { | ||
847 | unsigned long flags; | ||
848 | |||
849 | local_irq_save(flags); | ||
850 | kmemcheck_slab_free(s, x, s->objsize); | ||
851 | debug_check_no_locks_freed(x, s->objsize); | ||
852 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | ||
853 | debug_check_no_obj_freed(x, s->objsize); | ||
854 | local_irq_restore(flags); | ||
855 | } | ||
856 | #endif | ||
847 | } | 857 | } |
848 | 858 | ||
849 | /* | 859 | /* |
@@ -1130,9 +1140,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | |||
1130 | 1140 | ||
1131 | static inline void slab_free_hook(struct kmem_cache *s, void *x) {} | 1141 | static inline void slab_free_hook(struct kmem_cache *s, void *x) {} |
1132 | 1142 | ||
1133 | static inline void slab_free_hook_irq(struct kmem_cache *s, | ||
1134 | void *object) {} | ||
1135 | |||
1136 | #endif /* CONFIG_SLUB_DEBUG */ | 1143 | #endif /* CONFIG_SLUB_DEBUG */ |
1137 | 1144 | ||
1138 | /* | 1145 | /* |
@@ -1533,6 +1540,77 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1533 | } | 1540 | } |
1534 | } | 1541 | } |
1535 | 1542 | ||
1543 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1544 | #ifdef CONFIG_PREEMPT | ||
1545 | /* | ||
1546 | * Calculate the next globally unique transaction for disambiguiation | ||
1547 | * during cmpxchg. The transactions start with the cpu number and are then | ||
1548 | * incremented by CONFIG_NR_CPUS. | ||
1549 | */ | ||
1550 | #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS) | ||
1551 | #else | ||
1552 | /* | ||
1553 | * No preemption supported therefore also no need to check for | ||
1554 | * different cpus. | ||
1555 | */ | ||
1556 | #define TID_STEP 1 | ||
1557 | #endif | ||
1558 | |||
1559 | static inline unsigned long next_tid(unsigned long tid) | ||
1560 | { | ||
1561 | return tid + TID_STEP; | ||
1562 | } | ||
1563 | |||
1564 | static inline unsigned int tid_to_cpu(unsigned long tid) | ||
1565 | { | ||
1566 | return tid % TID_STEP; | ||
1567 | } | ||
1568 | |||
1569 | static inline unsigned long tid_to_event(unsigned long tid) | ||
1570 | { | ||
1571 | return tid / TID_STEP; | ||
1572 | } | ||
1573 | |||
1574 | static inline unsigned int init_tid(int cpu) | ||
1575 | { | ||
1576 | return cpu; | ||
1577 | } | ||
1578 | |||
1579 | static inline void note_cmpxchg_failure(const char *n, | ||
1580 | const struct kmem_cache *s, unsigned long tid) | ||
1581 | { | ||
1582 | #ifdef SLUB_DEBUG_CMPXCHG | ||
1583 | unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid); | ||
1584 | |||
1585 | printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name); | ||
1586 | |||
1587 | #ifdef CONFIG_PREEMPT | ||
1588 | if (tid_to_cpu(tid) != tid_to_cpu(actual_tid)) | ||
1589 | printk("due to cpu change %d -> %d\n", | ||
1590 | tid_to_cpu(tid), tid_to_cpu(actual_tid)); | ||
1591 | else | ||
1592 | #endif | ||
1593 | if (tid_to_event(tid) != tid_to_event(actual_tid)) | ||
1594 | printk("due to cpu running other code. Event %ld->%ld\n", | ||
1595 | tid_to_event(tid), tid_to_event(actual_tid)); | ||
1596 | else | ||
1597 | printk("for unknown reason: actual=%lx was=%lx target=%lx\n", | ||
1598 | actual_tid, tid, next_tid(tid)); | ||
1599 | #endif | ||
1600 | } | ||
1601 | |||
1602 | #endif | ||
1603 | |||
1604 | void init_kmem_cache_cpus(struct kmem_cache *s) | ||
1605 | { | ||
1606 | #if defined(CONFIG_CMPXCHG_LOCAL) && defined(CONFIG_PREEMPT) | ||
1607 | int cpu; | ||
1608 | |||
1609 | for_each_possible_cpu(cpu) | ||
1610 | per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); | ||
1611 | #endif | ||
1612 | |||
1613 | } | ||
1536 | /* | 1614 | /* |
1537 | * Remove the cpu slab | 1615 | * Remove the cpu slab |
1538 | */ | 1616 | */ |
@@ -1564,6 +1642,9 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1564 | page->inuse--; | 1642 | page->inuse--; |
1565 | } | 1643 | } |
1566 | c->page = NULL; | 1644 | c->page = NULL; |
1645 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1646 | c->tid = next_tid(c->tid); | ||
1647 | #endif | ||
1567 | unfreeze_slab(s, page, tail); | 1648 | unfreeze_slab(s, page, tail); |
1568 | } | 1649 | } |
1569 | 1650 | ||
@@ -1698,6 +1779,19 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
1698 | { | 1779 | { |
1699 | void **object; | 1780 | void **object; |
1700 | struct page *new; | 1781 | struct page *new; |
1782 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1783 | unsigned long flags; | ||
1784 | |||
1785 | local_irq_save(flags); | ||
1786 | #ifdef CONFIG_PREEMPT | ||
1787 | /* | ||
1788 | * We may have been preempted and rescheduled on a different | ||
1789 | * cpu before disabling interrupts. Need to reload cpu area | ||
1790 | * pointer. | ||
1791 | */ | ||
1792 | c = this_cpu_ptr(s->cpu_slab); | ||
1793 | #endif | ||
1794 | #endif | ||
1701 | 1795 | ||
1702 | /* We handle __GFP_ZERO in the caller */ | 1796 | /* We handle __GFP_ZERO in the caller */ |
1703 | gfpflags &= ~__GFP_ZERO; | 1797 | gfpflags &= ~__GFP_ZERO; |
@@ -1724,6 +1818,10 @@ load_freelist: | |||
1724 | c->node = page_to_nid(c->page); | 1818 | c->node = page_to_nid(c->page); |
1725 | unlock_out: | 1819 | unlock_out: |
1726 | slab_unlock(c->page); | 1820 | slab_unlock(c->page); |
1821 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1822 | c->tid = next_tid(c->tid); | ||
1823 | local_irq_restore(flags); | ||
1824 | #endif | ||
1727 | stat(s, ALLOC_SLOWPATH); | 1825 | stat(s, ALLOC_SLOWPATH); |
1728 | return object; | 1826 | return object; |
1729 | 1827 | ||
@@ -1785,23 +1883,76 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1785 | { | 1883 | { |
1786 | void **object; | 1884 | void **object; |
1787 | struct kmem_cache_cpu *c; | 1885 | struct kmem_cache_cpu *c; |
1886 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1887 | unsigned long tid; | ||
1888 | #else | ||
1788 | unsigned long flags; | 1889 | unsigned long flags; |
1890 | #endif | ||
1789 | 1891 | ||
1790 | if (slab_pre_alloc_hook(s, gfpflags)) | 1892 | if (slab_pre_alloc_hook(s, gfpflags)) |
1791 | return NULL; | 1893 | return NULL; |
1792 | 1894 | ||
1895 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1793 | local_irq_save(flags); | 1896 | local_irq_save(flags); |
1897 | #else | ||
1898 | redo: | ||
1899 | #endif | ||
1900 | |||
1901 | /* | ||
1902 | * Must read kmem_cache cpu data via this cpu ptr. Preemption is | ||
1903 | * enabled. We may switch back and forth between cpus while | ||
1904 | * reading from one cpu area. That does not matter as long | ||
1905 | * as we end up on the original cpu again when doing the cmpxchg. | ||
1906 | */ | ||
1794 | c = __this_cpu_ptr(s->cpu_slab); | 1907 | c = __this_cpu_ptr(s->cpu_slab); |
1908 | |||
1909 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1910 | /* | ||
1911 | * The transaction ids are globally unique per cpu and per operation on | ||
1912 | * a per cpu queue. Thus they can be guarantee that the cmpxchg_double | ||
1913 | * occurs on the right processor and that there was no operation on the | ||
1914 | * linked list in between. | ||
1915 | */ | ||
1916 | tid = c->tid; | ||
1917 | barrier(); | ||
1918 | #endif | ||
1919 | |||
1795 | object = c->freelist; | 1920 | object = c->freelist; |
1796 | if (unlikely(!object || !node_match(c, node))) | 1921 | if (unlikely(!object || !node_match(c, node))) |
1797 | 1922 | ||
1798 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1923 | object = __slab_alloc(s, gfpflags, node, addr, c); |
1799 | 1924 | ||
1800 | else { | 1925 | else { |
1926 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1927 | /* | ||
1928 | * The cmpxchg will only match if there was no additonal | ||
1929 | * operation and if we are on the right processor. | ||
1930 | * | ||
1931 | * The cmpxchg does the following atomically (without lock semantics!) | ||
1932 | * 1. Relocate first pointer to the current per cpu area. | ||
1933 | * 2. Verify that tid and freelist have not been changed | ||
1934 | * 3. If they were not changed replace tid and freelist | ||
1935 | * | ||
1936 | * Since this is without lock semantics the protection is only against | ||
1937 | * code executing on this cpu *not* from access by other cpus. | ||
1938 | */ | ||
1939 | if (unlikely(!this_cpu_cmpxchg_double( | ||
1940 | s->cpu_slab->freelist, s->cpu_slab->tid, | ||
1941 | object, tid, | ||
1942 | get_freepointer(s, object), next_tid(tid)))) { | ||
1943 | |||
1944 | note_cmpxchg_failure("slab_alloc", s, tid); | ||
1945 | goto redo; | ||
1946 | } | ||
1947 | #else | ||
1801 | c->freelist = get_freepointer(s, object); | 1948 | c->freelist = get_freepointer(s, object); |
1949 | #endif | ||
1802 | stat(s, ALLOC_FASTPATH); | 1950 | stat(s, ALLOC_FASTPATH); |
1803 | } | 1951 | } |
1952 | |||
1953 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1804 | local_irq_restore(flags); | 1954 | local_irq_restore(flags); |
1955 | #endif | ||
1805 | 1956 | ||
1806 | if (unlikely(gfpflags & __GFP_ZERO) && object) | 1957 | if (unlikely(gfpflags & __GFP_ZERO) && object) |
1807 | memset(object, 0, s->objsize); | 1958 | memset(object, 0, s->objsize); |
@@ -1879,9 +2030,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
1879 | { | 2030 | { |
1880 | void *prior; | 2031 | void *prior; |
1881 | void **object = (void *)x; | 2032 | void **object = (void *)x; |
2033 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2034 | unsigned long flags; | ||
1882 | 2035 | ||
1883 | stat(s, FREE_SLOWPATH); | 2036 | local_irq_save(flags); |
2037 | #endif | ||
1884 | slab_lock(page); | 2038 | slab_lock(page); |
2039 | stat(s, FREE_SLOWPATH); | ||
1885 | 2040 | ||
1886 | if (kmem_cache_debug(s)) | 2041 | if (kmem_cache_debug(s)) |
1887 | goto debug; | 2042 | goto debug; |
@@ -1911,6 +2066,9 @@ checks_ok: | |||
1911 | 2066 | ||
1912 | out_unlock: | 2067 | out_unlock: |
1913 | slab_unlock(page); | 2068 | slab_unlock(page); |
2069 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2070 | local_irq_restore(flags); | ||
2071 | #endif | ||
1914 | return; | 2072 | return; |
1915 | 2073 | ||
1916 | slab_empty: | 2074 | slab_empty: |
@@ -1922,6 +2080,9 @@ slab_empty: | |||
1922 | stat(s, FREE_REMOVE_PARTIAL); | 2080 | stat(s, FREE_REMOVE_PARTIAL); |
1923 | } | 2081 | } |
1924 | slab_unlock(page); | 2082 | slab_unlock(page); |
2083 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2084 | local_irq_restore(flags); | ||
2085 | #endif | ||
1925 | stat(s, FREE_SLAB); | 2086 | stat(s, FREE_SLAB); |
1926 | discard_slab(s, page); | 2087 | discard_slab(s, page); |
1927 | return; | 2088 | return; |
@@ -1948,23 +2109,56 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1948 | { | 2109 | { |
1949 | void **object = (void *)x; | 2110 | void **object = (void *)x; |
1950 | struct kmem_cache_cpu *c; | 2111 | struct kmem_cache_cpu *c; |
2112 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2113 | unsigned long tid; | ||
2114 | #else | ||
1951 | unsigned long flags; | 2115 | unsigned long flags; |
2116 | #endif | ||
1952 | 2117 | ||
1953 | slab_free_hook(s, x); | 2118 | slab_free_hook(s, x); |
1954 | 2119 | ||
2120 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1955 | local_irq_save(flags); | 2121 | local_irq_save(flags); |
2122 | |||
2123 | #else | ||
2124 | redo: | ||
2125 | #endif | ||
2126 | |||
2127 | /* | ||
2128 | * Determine the currently cpus per cpu slab. | ||
2129 | * The cpu may change afterward. However that does not matter since | ||
2130 | * data is retrieved via this pointer. If we are on the same cpu | ||
2131 | * during the cmpxchg then the free will succedd. | ||
2132 | */ | ||
1956 | c = __this_cpu_ptr(s->cpu_slab); | 2133 | c = __this_cpu_ptr(s->cpu_slab); |
1957 | 2134 | ||
1958 | slab_free_hook_irq(s, x); | 2135 | #ifdef CONFIG_CMPXCHG_LOCAL |
2136 | tid = c->tid; | ||
2137 | barrier(); | ||
2138 | #endif | ||
1959 | 2139 | ||
1960 | if (likely(page == c->page && c->node != NUMA_NO_NODE)) { | 2140 | if (likely(page == c->page && c->node != NUMA_NO_NODE)) { |
1961 | set_freepointer(s, object, c->freelist); | 2141 | set_freepointer(s, object, c->freelist); |
2142 | |||
2143 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2144 | if (unlikely(!this_cpu_cmpxchg_double( | ||
2145 | s->cpu_slab->freelist, s->cpu_slab->tid, | ||
2146 | c->freelist, tid, | ||
2147 | object, next_tid(tid)))) { | ||
2148 | |||
2149 | note_cmpxchg_failure("slab_free", s, tid); | ||
2150 | goto redo; | ||
2151 | } | ||
2152 | #else | ||
1962 | c->freelist = object; | 2153 | c->freelist = object; |
2154 | #endif | ||
1963 | stat(s, FREE_FASTPATH); | 2155 | stat(s, FREE_FASTPATH); |
1964 | } else | 2156 | } else |
1965 | __slab_free(s, page, x, addr); | 2157 | __slab_free(s, page, x, addr); |
1966 | 2158 | ||
2159 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1967 | local_irq_restore(flags); | 2160 | local_irq_restore(flags); |
2161 | #endif | ||
1968 | } | 2162 | } |
1969 | 2163 | ||
1970 | void kmem_cache_free(struct kmem_cache *s, void *x) | 2164 | void kmem_cache_free(struct kmem_cache *s, void *x) |
@@ -2156,9 +2350,23 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) | |||
2156 | BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < | 2350 | BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < |
2157 | SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); | 2351 | SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); |
2158 | 2352 | ||
2353 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2354 | /* | ||
2355 | * Must align to double word boundary for the double cmpxchg instructions | ||
2356 | * to work. | ||
2357 | */ | ||
2358 | s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *)); | ||
2359 | #else | ||
2360 | /* Regular alignment is sufficient */ | ||
2159 | s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); | 2361 | s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); |
2362 | #endif | ||
2363 | |||
2364 | if (!s->cpu_slab) | ||
2365 | return 0; | ||
2366 | |||
2367 | init_kmem_cache_cpus(s); | ||
2160 | 2368 | ||
2161 | return s->cpu_slab != NULL; | 2369 | return 1; |
2162 | } | 2370 | } |
2163 | 2371 | ||
2164 | static struct kmem_cache *kmem_cache_node; | 2372 | static struct kmem_cache *kmem_cache_node; |