diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 376 |
1 files changed, 309 insertions, 67 deletions
@@ -64,7 +64,7 @@ | |||
64 | * we must stay away from it for a while since we may cause a bouncing | 64 | * we must stay away from it for a while since we may cause a bouncing |
65 | * cacheline if we try to acquire the lock. So go onto the next slab. | 65 | * cacheline if we try to acquire the lock. So go onto the next slab. |
66 | * If all pages are busy then we may allocate a new slab instead of reusing | 66 | * If all pages are busy then we may allocate a new slab instead of reusing |
67 | * a partial slab. A new slab has noone operating on it and thus there is | 67 | * a partial slab. A new slab has no one operating on it and thus there is |
68 | * no danger of cacheline contention. | 68 | * no danger of cacheline contention. |
69 | * | 69 | * |
70 | * Interrupts are disabled during allocation and deallocation in order to | 70 | * Interrupts are disabled during allocation and deallocation in order to |
@@ -217,7 +217,7 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) | |||
217 | 217 | ||
218 | #endif | 218 | #endif |
219 | 219 | ||
220 | static inline void stat(struct kmem_cache *s, enum stat_item si) | 220 | static inline void stat(const struct kmem_cache *s, enum stat_item si) |
221 | { | 221 | { |
222 | #ifdef CONFIG_SLUB_STATS | 222 | #ifdef CONFIG_SLUB_STATS |
223 | __this_cpu_inc(s->cpu_slab->stat[si]); | 223 | __this_cpu_inc(s->cpu_slab->stat[si]); |
@@ -281,11 +281,40 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | |||
281 | return (p - addr) / s->size; | 281 | return (p - addr) / s->size; |
282 | } | 282 | } |
283 | 283 | ||
284 | static inline size_t slab_ksize(const struct kmem_cache *s) | ||
285 | { | ||
286 | #ifdef CONFIG_SLUB_DEBUG | ||
287 | /* | ||
288 | * Debugging requires use of the padding between object | ||
289 | * and whatever may come after it. | ||
290 | */ | ||
291 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | ||
292 | return s->objsize; | ||
293 | |||
294 | #endif | ||
295 | /* | ||
296 | * If we have the need to store the freelist pointer | ||
297 | * back there or track user information then we can | ||
298 | * only use the space before that information. | ||
299 | */ | ||
300 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | ||
301 | return s->inuse; | ||
302 | /* | ||
303 | * Else we can use all the padding etc for the allocation | ||
304 | */ | ||
305 | return s->size; | ||
306 | } | ||
307 | |||
308 | static inline int order_objects(int order, unsigned long size, int reserved) | ||
309 | { | ||
310 | return ((PAGE_SIZE << order) - reserved) / size; | ||
311 | } | ||
312 | |||
284 | static inline struct kmem_cache_order_objects oo_make(int order, | 313 | static inline struct kmem_cache_order_objects oo_make(int order, |
285 | unsigned long size) | 314 | unsigned long size, int reserved) |
286 | { | 315 | { |
287 | struct kmem_cache_order_objects x = { | 316 | struct kmem_cache_order_objects x = { |
288 | (order << OO_SHIFT) + (PAGE_SIZE << order) / size | 317 | (order << OO_SHIFT) + order_objects(order, size, reserved) |
289 | }; | 318 | }; |
290 | 319 | ||
291 | return x; | 320 | return x; |
@@ -617,7 +646,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
617 | return 1; | 646 | return 1; |
618 | 647 | ||
619 | start = page_address(page); | 648 | start = page_address(page); |
620 | length = (PAGE_SIZE << compound_order(page)); | 649 | length = (PAGE_SIZE << compound_order(page)) - s->reserved; |
621 | end = start + length; | 650 | end = start + length; |
622 | remainder = length % s->size; | 651 | remainder = length % s->size; |
623 | if (!remainder) | 652 | if (!remainder) |
@@ -698,7 +727,7 @@ static int check_slab(struct kmem_cache *s, struct page *page) | |||
698 | return 0; | 727 | return 0; |
699 | } | 728 | } |
700 | 729 | ||
701 | maxobj = (PAGE_SIZE << compound_order(page)) / s->size; | 730 | maxobj = order_objects(compound_order(page), s->size, s->reserved); |
702 | if (page->objects > maxobj) { | 731 | if (page->objects > maxobj) { |
703 | slab_err(s, page, "objects %u > max %u", | 732 | slab_err(s, page, "objects %u > max %u", |
704 | s->name, page->objects, maxobj); | 733 | s->name, page->objects, maxobj); |
@@ -748,7 +777,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | |||
748 | nr++; | 777 | nr++; |
749 | } | 778 | } |
750 | 779 | ||
751 | max_objects = (PAGE_SIZE << compound_order(page)) / s->size; | 780 | max_objects = order_objects(compound_order(page), s->size, s->reserved); |
752 | if (max_objects > MAX_OBJS_PER_PAGE) | 781 | if (max_objects > MAX_OBJS_PER_PAGE) |
753 | max_objects = MAX_OBJS_PER_PAGE; | 782 | max_objects = MAX_OBJS_PER_PAGE; |
754 | 783 | ||
@@ -800,21 +829,31 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) | |||
800 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) | 829 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) |
801 | { | 830 | { |
802 | flags &= gfp_allowed_mask; | 831 | flags &= gfp_allowed_mask; |
803 | kmemcheck_slab_alloc(s, flags, object, s->objsize); | 832 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); |
804 | kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); | 833 | kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); |
805 | } | 834 | } |
806 | 835 | ||
807 | static inline void slab_free_hook(struct kmem_cache *s, void *x) | 836 | static inline void slab_free_hook(struct kmem_cache *s, void *x) |
808 | { | 837 | { |
809 | kmemleak_free_recursive(x, s->flags); | 838 | kmemleak_free_recursive(x, s->flags); |
810 | } | ||
811 | 839 | ||
812 | static inline void slab_free_hook_irq(struct kmem_cache *s, void *object) | 840 | /* |
813 | { | 841 | * Trouble is that we may no longer disable interupts in the fast path |
814 | kmemcheck_slab_free(s, object, s->objsize); | 842 | * So in order to make the debug calls that expect irqs to be |
815 | debug_check_no_locks_freed(object, s->objsize); | 843 | * disabled we need to disable interrupts temporarily. |
844 | */ | ||
845 | #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) | ||
846 | { | ||
847 | unsigned long flags; | ||
848 | |||
849 | local_irq_save(flags); | ||
850 | kmemcheck_slab_free(s, x, s->objsize); | ||
851 | debug_check_no_locks_freed(x, s->objsize); | ||
852 | local_irq_restore(flags); | ||
853 | } | ||
854 | #endif | ||
816 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | 855 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) |
817 | debug_check_no_obj_freed(object, s->objsize); | 856 | debug_check_no_obj_freed(x, s->objsize); |
818 | } | 857 | } |
819 | 858 | ||
820 | /* | 859 | /* |
@@ -1101,9 +1140,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | |||
1101 | 1140 | ||
1102 | static inline void slab_free_hook(struct kmem_cache *s, void *x) {} | 1141 | static inline void slab_free_hook(struct kmem_cache *s, void *x) {} |
1103 | 1142 | ||
1104 | static inline void slab_free_hook_irq(struct kmem_cache *s, | ||
1105 | void *object) {} | ||
1106 | |||
1107 | #endif /* CONFIG_SLUB_DEBUG */ | 1143 | #endif /* CONFIG_SLUB_DEBUG */ |
1108 | 1144 | ||
1109 | /* | 1145 | /* |
@@ -1249,21 +1285,38 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1249 | __free_pages(page, order); | 1285 | __free_pages(page, order); |
1250 | } | 1286 | } |
1251 | 1287 | ||
1288 | #define need_reserve_slab_rcu \ | ||
1289 | (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) | ||
1290 | |||
1252 | static void rcu_free_slab(struct rcu_head *h) | 1291 | static void rcu_free_slab(struct rcu_head *h) |
1253 | { | 1292 | { |
1254 | struct page *page; | 1293 | struct page *page; |
1255 | 1294 | ||
1256 | page = container_of((struct list_head *)h, struct page, lru); | 1295 | if (need_reserve_slab_rcu) |
1296 | page = virt_to_head_page(h); | ||
1297 | else | ||
1298 | page = container_of((struct list_head *)h, struct page, lru); | ||
1299 | |||
1257 | __free_slab(page->slab, page); | 1300 | __free_slab(page->slab, page); |
1258 | } | 1301 | } |
1259 | 1302 | ||
1260 | static void free_slab(struct kmem_cache *s, struct page *page) | 1303 | static void free_slab(struct kmem_cache *s, struct page *page) |
1261 | { | 1304 | { |
1262 | if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { | 1305 | if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { |
1263 | /* | 1306 | struct rcu_head *head; |
1264 | * RCU free overloads the RCU head over the LRU | 1307 | |
1265 | */ | 1308 | if (need_reserve_slab_rcu) { |
1266 | struct rcu_head *head = (void *)&page->lru; | 1309 | int order = compound_order(page); |
1310 | int offset = (PAGE_SIZE << order) - s->reserved; | ||
1311 | |||
1312 | VM_BUG_ON(s->reserved != sizeof(*head)); | ||
1313 | head = page_address(page) + offset; | ||
1314 | } else { | ||
1315 | /* | ||
1316 | * RCU free overloads the RCU head over the LRU | ||
1317 | */ | ||
1318 | head = (void *)&page->lru; | ||
1319 | } | ||
1267 | 1320 | ||
1268 | call_rcu(head, rcu_free_slab); | 1321 | call_rcu(head, rcu_free_slab); |
1269 | } else | 1322 | } else |
@@ -1487,6 +1540,78 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1487 | } | 1540 | } |
1488 | } | 1541 | } |
1489 | 1542 | ||
1543 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1544 | #ifdef CONFIG_PREEMPT | ||
1545 | /* | ||
1546 | * Calculate the next globally unique transaction for disambiguiation | ||
1547 | * during cmpxchg. The transactions start with the cpu number and are then | ||
1548 | * incremented by CONFIG_NR_CPUS. | ||
1549 | */ | ||
1550 | #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS) | ||
1551 | #else | ||
1552 | /* | ||
1553 | * No preemption supported therefore also no need to check for | ||
1554 | * different cpus. | ||
1555 | */ | ||
1556 | #define TID_STEP 1 | ||
1557 | #endif | ||
1558 | |||
1559 | static inline unsigned long next_tid(unsigned long tid) | ||
1560 | { | ||
1561 | return tid + TID_STEP; | ||
1562 | } | ||
1563 | |||
1564 | static inline unsigned int tid_to_cpu(unsigned long tid) | ||
1565 | { | ||
1566 | return tid % TID_STEP; | ||
1567 | } | ||
1568 | |||
1569 | static inline unsigned long tid_to_event(unsigned long tid) | ||
1570 | { | ||
1571 | return tid / TID_STEP; | ||
1572 | } | ||
1573 | |||
1574 | static inline unsigned int init_tid(int cpu) | ||
1575 | { | ||
1576 | return cpu; | ||
1577 | } | ||
1578 | |||
1579 | static inline void note_cmpxchg_failure(const char *n, | ||
1580 | const struct kmem_cache *s, unsigned long tid) | ||
1581 | { | ||
1582 | #ifdef SLUB_DEBUG_CMPXCHG | ||
1583 | unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid); | ||
1584 | |||
1585 | printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name); | ||
1586 | |||
1587 | #ifdef CONFIG_PREEMPT | ||
1588 | if (tid_to_cpu(tid) != tid_to_cpu(actual_tid)) | ||
1589 | printk("due to cpu change %d -> %d\n", | ||
1590 | tid_to_cpu(tid), tid_to_cpu(actual_tid)); | ||
1591 | else | ||
1592 | #endif | ||
1593 | if (tid_to_event(tid) != tid_to_event(actual_tid)) | ||
1594 | printk("due to cpu running other code. Event %ld->%ld\n", | ||
1595 | tid_to_event(tid), tid_to_event(actual_tid)); | ||
1596 | else | ||
1597 | printk("for unknown reason: actual=%lx was=%lx target=%lx\n", | ||
1598 | actual_tid, tid, next_tid(tid)); | ||
1599 | #endif | ||
1600 | stat(s, CMPXCHG_DOUBLE_CPU_FAIL); | ||
1601 | } | ||
1602 | |||
1603 | #endif | ||
1604 | |||
1605 | void init_kmem_cache_cpus(struct kmem_cache *s) | ||
1606 | { | ||
1607 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1608 | int cpu; | ||
1609 | |||
1610 | for_each_possible_cpu(cpu) | ||
1611 | per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); | ||
1612 | #endif | ||
1613 | |||
1614 | } | ||
1490 | /* | 1615 | /* |
1491 | * Remove the cpu slab | 1616 | * Remove the cpu slab |
1492 | */ | 1617 | */ |
@@ -1518,6 +1643,9 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1518 | page->inuse--; | 1643 | page->inuse--; |
1519 | } | 1644 | } |
1520 | c->page = NULL; | 1645 | c->page = NULL; |
1646 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1647 | c->tid = next_tid(c->tid); | ||
1648 | #endif | ||
1521 | unfreeze_slab(s, page, tail); | 1649 | unfreeze_slab(s, page, tail); |
1522 | } | 1650 | } |
1523 | 1651 | ||
@@ -1652,6 +1780,19 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
1652 | { | 1780 | { |
1653 | void **object; | 1781 | void **object; |
1654 | struct page *new; | 1782 | struct page *new; |
1783 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1784 | unsigned long flags; | ||
1785 | |||
1786 | local_irq_save(flags); | ||
1787 | #ifdef CONFIG_PREEMPT | ||
1788 | /* | ||
1789 | * We may have been preempted and rescheduled on a different | ||
1790 | * cpu before disabling interrupts. Need to reload cpu area | ||
1791 | * pointer. | ||
1792 | */ | ||
1793 | c = this_cpu_ptr(s->cpu_slab); | ||
1794 | #endif | ||
1795 | #endif | ||
1655 | 1796 | ||
1656 | /* We handle __GFP_ZERO in the caller */ | 1797 | /* We handle __GFP_ZERO in the caller */ |
1657 | gfpflags &= ~__GFP_ZERO; | 1798 | gfpflags &= ~__GFP_ZERO; |
@@ -1678,6 +1819,10 @@ load_freelist: | |||
1678 | c->node = page_to_nid(c->page); | 1819 | c->node = page_to_nid(c->page); |
1679 | unlock_out: | 1820 | unlock_out: |
1680 | slab_unlock(c->page); | 1821 | slab_unlock(c->page); |
1822 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1823 | c->tid = next_tid(c->tid); | ||
1824 | local_irq_restore(flags); | ||
1825 | #endif | ||
1681 | stat(s, ALLOC_SLOWPATH); | 1826 | stat(s, ALLOC_SLOWPATH); |
1682 | return object; | 1827 | return object; |
1683 | 1828 | ||
@@ -1713,6 +1858,9 @@ new_slab: | |||
1713 | } | 1858 | } |
1714 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) | 1859 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) |
1715 | slab_out_of_memory(s, gfpflags, node); | 1860 | slab_out_of_memory(s, gfpflags, node); |
1861 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1862 | local_irq_restore(flags); | ||
1863 | #endif | ||
1716 | return NULL; | 1864 | return NULL; |
1717 | debug: | 1865 | debug: |
1718 | if (!alloc_debug_processing(s, c->page, object, addr)) | 1866 | if (!alloc_debug_processing(s, c->page, object, addr)) |
@@ -1739,23 +1887,76 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1739 | { | 1887 | { |
1740 | void **object; | 1888 | void **object; |
1741 | struct kmem_cache_cpu *c; | 1889 | struct kmem_cache_cpu *c; |
1890 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1891 | unsigned long tid; | ||
1892 | #else | ||
1742 | unsigned long flags; | 1893 | unsigned long flags; |
1894 | #endif | ||
1743 | 1895 | ||
1744 | if (slab_pre_alloc_hook(s, gfpflags)) | 1896 | if (slab_pre_alloc_hook(s, gfpflags)) |
1745 | return NULL; | 1897 | return NULL; |
1746 | 1898 | ||
1899 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1747 | local_irq_save(flags); | 1900 | local_irq_save(flags); |
1901 | #else | ||
1902 | redo: | ||
1903 | #endif | ||
1904 | |||
1905 | /* | ||
1906 | * Must read kmem_cache cpu data via this cpu ptr. Preemption is | ||
1907 | * enabled. We may switch back and forth between cpus while | ||
1908 | * reading from one cpu area. That does not matter as long | ||
1909 | * as we end up on the original cpu again when doing the cmpxchg. | ||
1910 | */ | ||
1748 | c = __this_cpu_ptr(s->cpu_slab); | 1911 | c = __this_cpu_ptr(s->cpu_slab); |
1912 | |||
1913 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1914 | /* | ||
1915 | * The transaction ids are globally unique per cpu and per operation on | ||
1916 | * a per cpu queue. Thus they can be guarantee that the cmpxchg_double | ||
1917 | * occurs on the right processor and that there was no operation on the | ||
1918 | * linked list in between. | ||
1919 | */ | ||
1920 | tid = c->tid; | ||
1921 | barrier(); | ||
1922 | #endif | ||
1923 | |||
1749 | object = c->freelist; | 1924 | object = c->freelist; |
1750 | if (unlikely(!object || !node_match(c, node))) | 1925 | if (unlikely(!object || !node_match(c, node))) |
1751 | 1926 | ||
1752 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1927 | object = __slab_alloc(s, gfpflags, node, addr, c); |
1753 | 1928 | ||
1754 | else { | 1929 | else { |
1930 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
1931 | /* | ||
1932 | * The cmpxchg will only match if there was no additional | ||
1933 | * operation and if we are on the right processor. | ||
1934 | * | ||
1935 | * The cmpxchg does the following atomically (without lock semantics!) | ||
1936 | * 1. Relocate first pointer to the current per cpu area. | ||
1937 | * 2. Verify that tid and freelist have not been changed | ||
1938 | * 3. If they were not changed replace tid and freelist | ||
1939 | * | ||
1940 | * Since this is without lock semantics the protection is only against | ||
1941 | * code executing on this cpu *not* from access by other cpus. | ||
1942 | */ | ||
1943 | if (unlikely(!this_cpu_cmpxchg_double( | ||
1944 | s->cpu_slab->freelist, s->cpu_slab->tid, | ||
1945 | object, tid, | ||
1946 | get_freepointer(s, object), next_tid(tid)))) { | ||
1947 | |||
1948 | note_cmpxchg_failure("slab_alloc", s, tid); | ||
1949 | goto redo; | ||
1950 | } | ||
1951 | #else | ||
1755 | c->freelist = get_freepointer(s, object); | 1952 | c->freelist = get_freepointer(s, object); |
1953 | #endif | ||
1756 | stat(s, ALLOC_FASTPATH); | 1954 | stat(s, ALLOC_FASTPATH); |
1757 | } | 1955 | } |
1956 | |||
1957 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1758 | local_irq_restore(flags); | 1958 | local_irq_restore(flags); |
1959 | #endif | ||
1759 | 1960 | ||
1760 | if (unlikely(gfpflags & __GFP_ZERO) && object) | 1961 | if (unlikely(gfpflags & __GFP_ZERO) && object) |
1761 | memset(object, 0, s->objsize); | 1962 | memset(object, 0, s->objsize); |
@@ -1833,9 +2034,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
1833 | { | 2034 | { |
1834 | void *prior; | 2035 | void *prior; |
1835 | void **object = (void *)x; | 2036 | void **object = (void *)x; |
2037 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2038 | unsigned long flags; | ||
1836 | 2039 | ||
1837 | stat(s, FREE_SLOWPATH); | 2040 | local_irq_save(flags); |
2041 | #endif | ||
1838 | slab_lock(page); | 2042 | slab_lock(page); |
2043 | stat(s, FREE_SLOWPATH); | ||
1839 | 2044 | ||
1840 | if (kmem_cache_debug(s)) | 2045 | if (kmem_cache_debug(s)) |
1841 | goto debug; | 2046 | goto debug; |
@@ -1865,6 +2070,9 @@ checks_ok: | |||
1865 | 2070 | ||
1866 | out_unlock: | 2071 | out_unlock: |
1867 | slab_unlock(page); | 2072 | slab_unlock(page); |
2073 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2074 | local_irq_restore(flags); | ||
2075 | #endif | ||
1868 | return; | 2076 | return; |
1869 | 2077 | ||
1870 | slab_empty: | 2078 | slab_empty: |
@@ -1876,6 +2084,9 @@ slab_empty: | |||
1876 | stat(s, FREE_REMOVE_PARTIAL); | 2084 | stat(s, FREE_REMOVE_PARTIAL); |
1877 | } | 2085 | } |
1878 | slab_unlock(page); | 2086 | slab_unlock(page); |
2087 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2088 | local_irq_restore(flags); | ||
2089 | #endif | ||
1879 | stat(s, FREE_SLAB); | 2090 | stat(s, FREE_SLAB); |
1880 | discard_slab(s, page); | 2091 | discard_slab(s, page); |
1881 | return; | 2092 | return; |
@@ -1902,23 +2113,56 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1902 | { | 2113 | { |
1903 | void **object = (void *)x; | 2114 | void **object = (void *)x; |
1904 | struct kmem_cache_cpu *c; | 2115 | struct kmem_cache_cpu *c; |
2116 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2117 | unsigned long tid; | ||
2118 | #else | ||
1905 | unsigned long flags; | 2119 | unsigned long flags; |
2120 | #endif | ||
1906 | 2121 | ||
1907 | slab_free_hook(s, x); | 2122 | slab_free_hook(s, x); |
1908 | 2123 | ||
2124 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1909 | local_irq_save(flags); | 2125 | local_irq_save(flags); |
2126 | |||
2127 | #else | ||
2128 | redo: | ||
2129 | #endif | ||
2130 | |||
2131 | /* | ||
2132 | * Determine the currently cpus per cpu slab. | ||
2133 | * The cpu may change afterward. However that does not matter since | ||
2134 | * data is retrieved via this pointer. If we are on the same cpu | ||
2135 | * during the cmpxchg then the free will succedd. | ||
2136 | */ | ||
1910 | c = __this_cpu_ptr(s->cpu_slab); | 2137 | c = __this_cpu_ptr(s->cpu_slab); |
1911 | 2138 | ||
1912 | slab_free_hook_irq(s, x); | 2139 | #ifdef CONFIG_CMPXCHG_LOCAL |
2140 | tid = c->tid; | ||
2141 | barrier(); | ||
2142 | #endif | ||
1913 | 2143 | ||
1914 | if (likely(page == c->page && c->node != NUMA_NO_NODE)) { | 2144 | if (likely(page == c->page && c->node != NUMA_NO_NODE)) { |
1915 | set_freepointer(s, object, c->freelist); | 2145 | set_freepointer(s, object, c->freelist); |
2146 | |||
2147 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2148 | if (unlikely(!this_cpu_cmpxchg_double( | ||
2149 | s->cpu_slab->freelist, s->cpu_slab->tid, | ||
2150 | c->freelist, tid, | ||
2151 | object, next_tid(tid)))) { | ||
2152 | |||
2153 | note_cmpxchg_failure("slab_free", s, tid); | ||
2154 | goto redo; | ||
2155 | } | ||
2156 | #else | ||
1916 | c->freelist = object; | 2157 | c->freelist = object; |
2158 | #endif | ||
1917 | stat(s, FREE_FASTPATH); | 2159 | stat(s, FREE_FASTPATH); |
1918 | } else | 2160 | } else |
1919 | __slab_free(s, page, x, addr); | 2161 | __slab_free(s, page, x, addr); |
1920 | 2162 | ||
2163 | #ifndef CONFIG_CMPXCHG_LOCAL | ||
1921 | local_irq_restore(flags); | 2164 | local_irq_restore(flags); |
2165 | #endif | ||
1922 | } | 2166 | } |
1923 | 2167 | ||
1924 | void kmem_cache_free(struct kmem_cache *s, void *x) | 2168 | void kmem_cache_free(struct kmem_cache *s, void *x) |
@@ -1988,13 +2232,13 @@ static int slub_nomerge; | |||
1988 | * the smallest order which will fit the object. | 2232 | * the smallest order which will fit the object. |
1989 | */ | 2233 | */ |
1990 | static inline int slab_order(int size, int min_objects, | 2234 | static inline int slab_order(int size, int min_objects, |
1991 | int max_order, int fract_leftover) | 2235 | int max_order, int fract_leftover, int reserved) |
1992 | { | 2236 | { |
1993 | int order; | 2237 | int order; |
1994 | int rem; | 2238 | int rem; |
1995 | int min_order = slub_min_order; | 2239 | int min_order = slub_min_order; |
1996 | 2240 | ||
1997 | if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE) | 2241 | if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE) |
1998 | return get_order(size * MAX_OBJS_PER_PAGE) - 1; | 2242 | return get_order(size * MAX_OBJS_PER_PAGE) - 1; |
1999 | 2243 | ||
2000 | for (order = max(min_order, | 2244 | for (order = max(min_order, |
@@ -2003,10 +2247,10 @@ static inline int slab_order(int size, int min_objects, | |||
2003 | 2247 | ||
2004 | unsigned long slab_size = PAGE_SIZE << order; | 2248 | unsigned long slab_size = PAGE_SIZE << order; |
2005 | 2249 | ||
2006 | if (slab_size < min_objects * size) | 2250 | if (slab_size < min_objects * size + reserved) |
2007 | continue; | 2251 | continue; |
2008 | 2252 | ||
2009 | rem = slab_size % size; | 2253 | rem = (slab_size - reserved) % size; |
2010 | 2254 | ||
2011 | if (rem <= slab_size / fract_leftover) | 2255 | if (rem <= slab_size / fract_leftover) |
2012 | break; | 2256 | break; |
@@ -2016,7 +2260,7 @@ static inline int slab_order(int size, int min_objects, | |||
2016 | return order; | 2260 | return order; |
2017 | } | 2261 | } |
2018 | 2262 | ||
2019 | static inline int calculate_order(int size) | 2263 | static inline int calculate_order(int size, int reserved) |
2020 | { | 2264 | { |
2021 | int order; | 2265 | int order; |
2022 | int min_objects; | 2266 | int min_objects; |
@@ -2034,14 +2278,14 @@ static inline int calculate_order(int size) | |||
2034 | min_objects = slub_min_objects; | 2278 | min_objects = slub_min_objects; |
2035 | if (!min_objects) | 2279 | if (!min_objects) |
2036 | min_objects = 4 * (fls(nr_cpu_ids) + 1); | 2280 | min_objects = 4 * (fls(nr_cpu_ids) + 1); |
2037 | max_objects = (PAGE_SIZE << slub_max_order)/size; | 2281 | max_objects = order_objects(slub_max_order, size, reserved); |
2038 | min_objects = min(min_objects, max_objects); | 2282 | min_objects = min(min_objects, max_objects); |
2039 | 2283 | ||
2040 | while (min_objects > 1) { | 2284 | while (min_objects > 1) { |
2041 | fraction = 16; | 2285 | fraction = 16; |
2042 | while (fraction >= 4) { | 2286 | while (fraction >= 4) { |
2043 | order = slab_order(size, min_objects, | 2287 | order = slab_order(size, min_objects, |
2044 | slub_max_order, fraction); | 2288 | slub_max_order, fraction, reserved); |
2045 | if (order <= slub_max_order) | 2289 | if (order <= slub_max_order) |
2046 | return order; | 2290 | return order; |
2047 | fraction /= 2; | 2291 | fraction /= 2; |
@@ -2053,14 +2297,14 @@ static inline int calculate_order(int size) | |||
2053 | * We were unable to place multiple objects in a slab. Now | 2297 | * We were unable to place multiple objects in a slab. Now |
2054 | * lets see if we can place a single object there. | 2298 | * lets see if we can place a single object there. |
2055 | */ | 2299 | */ |
2056 | order = slab_order(size, 1, slub_max_order, 1); | 2300 | order = slab_order(size, 1, slub_max_order, 1, reserved); |
2057 | if (order <= slub_max_order) | 2301 | if (order <= slub_max_order) |
2058 | return order; | 2302 | return order; |
2059 | 2303 | ||
2060 | /* | 2304 | /* |
2061 | * Doh this slab cannot be placed using slub_max_order. | 2305 | * Doh this slab cannot be placed using slub_max_order. |
2062 | */ | 2306 | */ |
2063 | order = slab_order(size, 1, MAX_ORDER, 1); | 2307 | order = slab_order(size, 1, MAX_ORDER, 1, reserved); |
2064 | if (order < MAX_ORDER) | 2308 | if (order < MAX_ORDER) |
2065 | return order; | 2309 | return order; |
2066 | return -ENOSYS; | 2310 | return -ENOSYS; |
@@ -2110,9 +2354,23 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) | |||
2110 | BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < | 2354 | BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < |
2111 | SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); | 2355 | SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); |
2112 | 2356 | ||
2357 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
2358 | /* | ||
2359 | * Must align to double word boundary for the double cmpxchg instructions | ||
2360 | * to work. | ||
2361 | */ | ||
2362 | s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *)); | ||
2363 | #else | ||
2364 | /* Regular alignment is sufficient */ | ||
2113 | s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); | 2365 | s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); |
2366 | #endif | ||
2367 | |||
2368 | if (!s->cpu_slab) | ||
2369 | return 0; | ||
2114 | 2370 | ||
2115 | return s->cpu_slab != NULL; | 2371 | init_kmem_cache_cpus(s); |
2372 | |||
2373 | return 1; | ||
2116 | } | 2374 | } |
2117 | 2375 | ||
2118 | static struct kmem_cache *kmem_cache_node; | 2376 | static struct kmem_cache *kmem_cache_node; |
@@ -2311,7 +2569,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2311 | if (forced_order >= 0) | 2569 | if (forced_order >= 0) |
2312 | order = forced_order; | 2570 | order = forced_order; |
2313 | else | 2571 | else |
2314 | order = calculate_order(size); | 2572 | order = calculate_order(size, s->reserved); |
2315 | 2573 | ||
2316 | if (order < 0) | 2574 | if (order < 0) |
2317 | return 0; | 2575 | return 0; |
@@ -2329,8 +2587,8 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2329 | /* | 2587 | /* |
2330 | * Determine the number of objects per slab | 2588 | * Determine the number of objects per slab |
2331 | */ | 2589 | */ |
2332 | s->oo = oo_make(order, size); | 2590 | s->oo = oo_make(order, size, s->reserved); |
2333 | s->min = oo_make(get_order(size), size); | 2591 | s->min = oo_make(get_order(size), size, s->reserved); |
2334 | if (oo_objects(s->oo) > oo_objects(s->max)) | 2592 | if (oo_objects(s->oo) > oo_objects(s->max)) |
2335 | s->max = s->oo; | 2593 | s->max = s->oo; |
2336 | 2594 | ||
@@ -2349,6 +2607,10 @@ static int kmem_cache_open(struct kmem_cache *s, | |||
2349 | s->objsize = size; | 2607 | s->objsize = size; |
2350 | s->align = align; | 2608 | s->align = align; |
2351 | s->flags = kmem_cache_flags(size, flags, name, ctor); | 2609 | s->flags = kmem_cache_flags(size, flags, name, ctor); |
2610 | s->reserved = 0; | ||
2611 | |||
2612 | if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) | ||
2613 | s->reserved = sizeof(struct rcu_head); | ||
2352 | 2614 | ||
2353 | if (!calculate_sizes(s, -1)) | 2615 | if (!calculate_sizes(s, -1)) |
2354 | goto error; | 2616 | goto error; |
@@ -2399,12 +2661,6 @@ unsigned int kmem_cache_size(struct kmem_cache *s) | |||
2399 | } | 2661 | } |
2400 | EXPORT_SYMBOL(kmem_cache_size); | 2662 | EXPORT_SYMBOL(kmem_cache_size); |
2401 | 2663 | ||
2402 | const char *kmem_cache_name(struct kmem_cache *s) | ||
2403 | { | ||
2404 | return s->name; | ||
2405 | } | ||
2406 | EXPORT_SYMBOL(kmem_cache_name); | ||
2407 | |||
2408 | static void list_slab_objects(struct kmem_cache *s, struct page *page, | 2664 | static void list_slab_objects(struct kmem_cache *s, struct page *page, |
2409 | const char *text) | 2665 | const char *text) |
2410 | { | 2666 | { |
@@ -2696,7 +2952,6 @@ EXPORT_SYMBOL(__kmalloc_node); | |||
2696 | size_t ksize(const void *object) | 2952 | size_t ksize(const void *object) |
2697 | { | 2953 | { |
2698 | struct page *page; | 2954 | struct page *page; |
2699 | struct kmem_cache *s; | ||
2700 | 2955 | ||
2701 | if (unlikely(object == ZERO_SIZE_PTR)) | 2956 | if (unlikely(object == ZERO_SIZE_PTR)) |
2702 | return 0; | 2957 | return 0; |
@@ -2707,28 +2962,8 @@ size_t ksize(const void *object) | |||
2707 | WARN_ON(!PageCompound(page)); | 2962 | WARN_ON(!PageCompound(page)); |
2708 | return PAGE_SIZE << compound_order(page); | 2963 | return PAGE_SIZE << compound_order(page); |
2709 | } | 2964 | } |
2710 | s = page->slab; | ||
2711 | |||
2712 | #ifdef CONFIG_SLUB_DEBUG | ||
2713 | /* | ||
2714 | * Debugging requires use of the padding between object | ||
2715 | * and whatever may come after it. | ||
2716 | */ | ||
2717 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | ||
2718 | return s->objsize; | ||
2719 | 2965 | ||
2720 | #endif | 2966 | return slab_ksize(page->slab); |
2721 | /* | ||
2722 | * If we have the need to store the freelist pointer | ||
2723 | * back there or track user information then we can | ||
2724 | * only use the space before that information. | ||
2725 | */ | ||
2726 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | ||
2727 | return s->inuse; | ||
2728 | /* | ||
2729 | * Else we can use all the padding etc for the allocation | ||
2730 | */ | ||
2731 | return s->size; | ||
2732 | } | 2967 | } |
2733 | EXPORT_SYMBOL(ksize); | 2968 | EXPORT_SYMBOL(ksize); |
2734 | 2969 | ||
@@ -3312,7 +3547,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) | |||
3312 | 3547 | ||
3313 | ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); | 3548 | ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); |
3314 | 3549 | ||
3315 | /* Honor the call site pointer we recieved. */ | 3550 | /* Honor the call site pointer we received. */ |
3316 | trace_kmalloc(caller, ret, size, s->size, gfpflags); | 3551 | trace_kmalloc(caller, ret, size, s->size, gfpflags); |
3317 | 3552 | ||
3318 | return ret; | 3553 | return ret; |
@@ -3342,7 +3577,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
3342 | 3577 | ||
3343 | ret = slab_alloc(s, gfpflags, node, caller); | 3578 | ret = slab_alloc(s, gfpflags, node, caller); |
3344 | 3579 | ||
3345 | /* Honor the call site pointer we recieved. */ | 3580 | /* Honor the call site pointer we received. */ |
3346 | trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); | 3581 | trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); |
3347 | 3582 | ||
3348 | return ret; | 3583 | return ret; |
@@ -4017,6 +4252,12 @@ static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) | |||
4017 | } | 4252 | } |
4018 | SLAB_ATTR_RO(destroy_by_rcu); | 4253 | SLAB_ATTR_RO(destroy_by_rcu); |
4019 | 4254 | ||
4255 | static ssize_t reserved_show(struct kmem_cache *s, char *buf) | ||
4256 | { | ||
4257 | return sprintf(buf, "%d\n", s->reserved); | ||
4258 | } | ||
4259 | SLAB_ATTR_RO(reserved); | ||
4260 | |||
4020 | #ifdef CONFIG_SLUB_DEBUG | 4261 | #ifdef CONFIG_SLUB_DEBUG |
4021 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) | 4262 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) |
4022 | { | 4263 | { |
@@ -4303,6 +4544,7 @@ static struct attribute *slab_attrs[] = { | |||
4303 | &reclaim_account_attr.attr, | 4544 | &reclaim_account_attr.attr, |
4304 | &destroy_by_rcu_attr.attr, | 4545 | &destroy_by_rcu_attr.attr, |
4305 | &shrink_attr.attr, | 4546 | &shrink_attr.attr, |
4547 | &reserved_attr.attr, | ||
4306 | #ifdef CONFIG_SLUB_DEBUG | 4548 | #ifdef CONFIG_SLUB_DEBUG |
4307 | &total_objects_attr.attr, | 4549 | &total_objects_attr.attr, |
4308 | &slabs_attr.attr, | 4550 | &slabs_attr.attr, |