diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/balloon_compaction.c | 2 | ||||
-rw-r--r-- | mm/bootmem.c | 9 | ||||
-rw-r--r-- | mm/cma.c | 68 | ||||
-rw-r--r-- | mm/compaction.c | 21 | ||||
-rw-r--r-- | mm/huge_memory.c | 15 | ||||
-rw-r--r-- | mm/internal.h | 25 | ||||
-rw-r--r-- | mm/iov_iter.c | 1062 | ||||
-rw-r--r-- | mm/memcontrol.c | 105 | ||||
-rw-r--r-- | mm/memory.c | 1 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 31 | ||||
-rw-r--r-- | mm/mmap.c | 8 | ||||
-rw-r--r-- | mm/nobootmem.c | 8 | ||||
-rw-r--r-- | mm/page-writeback.c | 43 | ||||
-rw-r--r-- | mm/page_alloc.c | 68 | ||||
-rw-r--r-- | mm/page_cgroup.c | 1 | ||||
-rw-r--r-- | mm/page_isolation.c | 43 | ||||
-rw-r--r-- | mm/rmap.c | 88 | ||||
-rw-r--r-- | mm/slab_common.c | 14 | ||||
-rw-r--r-- | mm/truncate.c | 6 |
19 files changed, 762 insertions, 856 deletions
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index b3cbe19f71b5..fcad8322ef36 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c | |||
@@ -68,11 +68,13 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) | |||
68 | * to be released by the balloon driver. | 68 | * to be released by the balloon driver. |
69 | */ | 69 | */ |
70 | if (trylock_page(page)) { | 70 | if (trylock_page(page)) { |
71 | #ifdef CONFIG_BALLOON_COMPACTION | ||
71 | if (!PagePrivate(page)) { | 72 | if (!PagePrivate(page)) { |
72 | /* raced with isolation */ | 73 | /* raced with isolation */ |
73 | unlock_page(page); | 74 | unlock_page(page); |
74 | continue; | 75 | continue; |
75 | } | 76 | } |
77 | #endif | ||
76 | spin_lock_irqsave(&b_dev_info->pages_lock, flags); | 78 | spin_lock_irqsave(&b_dev_info->pages_lock, flags); |
77 | balloon_page_delete(page); | 79 | balloon_page_delete(page); |
78 | __count_vm_event(BALLOON_DEFLATE); | 80 | __count_vm_event(BALLOON_DEFLATE); |
diff --git a/mm/bootmem.c b/mm/bootmem.c index 8a000cebb0d7..477be696511d 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
243 | 243 | ||
244 | static int reset_managed_pages_done __initdata; | 244 | static int reset_managed_pages_done __initdata; |
245 | 245 | ||
246 | static inline void __init reset_node_managed_pages(pg_data_t *pgdat) | 246 | void reset_node_managed_pages(pg_data_t *pgdat) |
247 | { | 247 | { |
248 | struct zone *z; | 248 | struct zone *z; |
249 | 249 | ||
250 | if (reset_managed_pages_done) | ||
251 | return; | ||
252 | |||
253 | for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) | 250 | for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) |
254 | z->managed_pages = 0; | 251 | z->managed_pages = 0; |
255 | } | 252 | } |
@@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void) | |||
258 | { | 255 | { |
259 | struct pglist_data *pgdat; | 256 | struct pglist_data *pgdat; |
260 | 257 | ||
258 | if (reset_managed_pages_done) | ||
259 | return; | ||
260 | |||
261 | for_each_online_pgdat(pgdat) | 261 | for_each_online_pgdat(pgdat) |
262 | reset_node_managed_pages(pgdat); | 262 | reset_node_managed_pages(pgdat); |
263 | |||
263 | reset_managed_pages_done = 1; | 264 | reset_managed_pages_done = 1; |
264 | } | 265 | } |
265 | 266 | ||
@@ -124,6 +124,7 @@ static int __init cma_activate_area(struct cma *cma) | |||
124 | 124 | ||
125 | err: | 125 | err: |
126 | kfree(cma->bitmap); | 126 | kfree(cma->bitmap); |
127 | cma->count = 0; | ||
127 | return -EINVAL; | 128 | return -EINVAL; |
128 | } | 129 | } |
129 | 130 | ||
@@ -217,9 +218,8 @@ int __init cma_declare_contiguous(phys_addr_t base, | |||
217 | phys_addr_t highmem_start = __pa(high_memory); | 218 | phys_addr_t highmem_start = __pa(high_memory); |
218 | int ret = 0; | 219 | int ret = 0; |
219 | 220 | ||
220 | pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n", | 221 | pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", |
221 | __func__, (unsigned long)size, (unsigned long)base, | 222 | __func__, &size, &base, &limit, &alignment); |
222 | (unsigned long)limit, (unsigned long)alignment); | ||
223 | 223 | ||
224 | if (cma_area_count == ARRAY_SIZE(cma_areas)) { | 224 | if (cma_area_count == ARRAY_SIZE(cma_areas)) { |
225 | pr_err("Not enough slots for CMA reserved regions!\n"); | 225 | pr_err("Not enough slots for CMA reserved regions!\n"); |
@@ -244,52 +244,72 @@ int __init cma_declare_contiguous(phys_addr_t base, | |||
244 | size = ALIGN(size, alignment); | 244 | size = ALIGN(size, alignment); |
245 | limit &= ~(alignment - 1); | 245 | limit &= ~(alignment - 1); |
246 | 246 | ||
247 | if (!base) | ||
248 | fixed = false; | ||
249 | |||
247 | /* size should be aligned with order_per_bit */ | 250 | /* size should be aligned with order_per_bit */ |
248 | if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) | 251 | if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) |
249 | return -EINVAL; | 252 | return -EINVAL; |
250 | 253 | ||
251 | /* | 254 | /* |
252 | * adjust limit to avoid crossing low/high memory boundary for | 255 | * If allocating at a fixed base the request region must not cross the |
253 | * automatically allocated regions | 256 | * low/high memory boundary. |
254 | */ | 257 | */ |
255 | if (((limit == 0 || limit > memblock_end) && | 258 | if (fixed && base < highmem_start && base + size > highmem_start) { |
256 | (memblock_end - size < highmem_start && | ||
257 | memblock_end > highmem_start)) || | ||
258 | (!fixed && limit > highmem_start && limit - size < highmem_start)) { | ||
259 | limit = highmem_start; | ||
260 | } | ||
261 | |||
262 | if (fixed && base < highmem_start && base+size > highmem_start) { | ||
263 | ret = -EINVAL; | 259 | ret = -EINVAL; |
264 | pr_err("Region at %08lx defined on low/high memory boundary (%08lx)\n", | 260 | pr_err("Region at %pa defined on low/high memory boundary (%pa)\n", |
265 | (unsigned long)base, (unsigned long)highmem_start); | 261 | &base, &highmem_start); |
266 | goto err; | 262 | goto err; |
267 | } | 263 | } |
268 | 264 | ||
265 | /* | ||
266 | * If the limit is unspecified or above the memblock end, its effective | ||
267 | * value will be the memblock end. Set it explicitly to simplify further | ||
268 | * checks. | ||
269 | */ | ||
270 | if (limit == 0 || limit > memblock_end) | ||
271 | limit = memblock_end; | ||
272 | |||
269 | /* Reserve memory */ | 273 | /* Reserve memory */ |
270 | if (base && fixed) { | 274 | if (fixed) { |
271 | if (memblock_is_region_reserved(base, size) || | 275 | if (memblock_is_region_reserved(base, size) || |
272 | memblock_reserve(base, size) < 0) { | 276 | memblock_reserve(base, size) < 0) { |
273 | ret = -EBUSY; | 277 | ret = -EBUSY; |
274 | goto err; | 278 | goto err; |
275 | } | 279 | } |
276 | } else { | 280 | } else { |
277 | phys_addr_t addr = memblock_alloc_range(size, alignment, base, | 281 | phys_addr_t addr = 0; |
278 | limit); | 282 | |
283 | /* | ||
284 | * All pages in the reserved area must come from the same zone. | ||
285 | * If the requested region crosses the low/high memory boundary, | ||
286 | * try allocating from high memory first and fall back to low | ||
287 | * memory in case of failure. | ||
288 | */ | ||
289 | if (base < highmem_start && limit > highmem_start) { | ||
290 | addr = memblock_alloc_range(size, alignment, | ||
291 | highmem_start, limit); | ||
292 | limit = highmem_start; | ||
293 | } | ||
294 | |||
279 | if (!addr) { | 295 | if (!addr) { |
280 | ret = -ENOMEM; | 296 | addr = memblock_alloc_range(size, alignment, base, |
281 | goto err; | 297 | limit); |
282 | } else { | 298 | if (!addr) { |
283 | base = addr; | 299 | ret = -ENOMEM; |
300 | goto err; | ||
301 | } | ||
284 | } | 302 | } |
303 | |||
304 | base = addr; | ||
285 | } | 305 | } |
286 | 306 | ||
287 | ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma); | 307 | ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma); |
288 | if (ret) | 308 | if (ret) |
289 | goto err; | 309 | goto err; |
290 | 310 | ||
291 | pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, | 311 | pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M, |
292 | (unsigned long)base); | 312 | &base); |
293 | return 0; | 313 | return 0; |
294 | 314 | ||
295 | err: | 315 | err: |
diff --git a/mm/compaction.c b/mm/compaction.c index edba18aed173..f9792ba3537c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -479,6 +479,16 @@ isolate_freepages_range(struct compact_control *cc, | |||
479 | 479 | ||
480 | block_end_pfn = min(block_end_pfn, end_pfn); | 480 | block_end_pfn = min(block_end_pfn, end_pfn); |
481 | 481 | ||
482 | /* | ||
483 | * pfn could pass the block_end_pfn if isolated freepage | ||
484 | * is more than pageblock order. In this case, we adjust | ||
485 | * scanning range to right one. | ||
486 | */ | ||
487 | if (pfn >= block_end_pfn) { | ||
488 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); | ||
489 | block_end_pfn = min(block_end_pfn, end_pfn); | ||
490 | } | ||
491 | |||
482 | if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) | 492 | if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) |
483 | break; | 493 | break; |
484 | 494 | ||
@@ -784,6 +794,9 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, | |||
784 | cc->nr_migratepages = 0; | 794 | cc->nr_migratepages = 0; |
785 | break; | 795 | break; |
786 | } | 796 | } |
797 | |||
798 | if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) | ||
799 | break; | ||
787 | } | 800 | } |
788 | acct_isolated(cc->zone, cc); | 801 | acct_isolated(cc->zone, cc); |
789 | 802 | ||
@@ -1026,8 +1039,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
1026 | } | 1039 | } |
1027 | 1040 | ||
1028 | acct_isolated(zone, cc); | 1041 | acct_isolated(zone, cc); |
1029 | /* Record where migration scanner will be restarted */ | 1042 | /* |
1030 | cc->migrate_pfn = low_pfn; | 1043 | * Record where migration scanner will be restarted. If we end up in |
1044 | * the same pageblock as the free scanner, make the scanners fully | ||
1045 | * meet so that compact_finished() terminates compaction. | ||
1046 | */ | ||
1047 | cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn; | ||
1031 | 1048 | ||
1032 | return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; | 1049 | return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; |
1033 | } | 1050 | } |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 74c78aa8bc2f..de984159cf0b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -200,7 +200,7 @@ retry: | |||
200 | preempt_disable(); | 200 | preempt_disable(); |
201 | if (cmpxchg(&huge_zero_page, NULL, zero_page)) { | 201 | if (cmpxchg(&huge_zero_page, NULL, zero_page)) { |
202 | preempt_enable(); | 202 | preempt_enable(); |
203 | __free_page(zero_page); | 203 | __free_pages(zero_page, compound_order(zero_page)); |
204 | goto retry; | 204 | goto retry; |
205 | } | 205 | } |
206 | 206 | ||
@@ -232,7 +232,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, | |||
232 | if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { | 232 | if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { |
233 | struct page *zero_page = xchg(&huge_zero_page, NULL); | 233 | struct page *zero_page = xchg(&huge_zero_page, NULL); |
234 | BUG_ON(zero_page == NULL); | 234 | BUG_ON(zero_page == NULL); |
235 | __free_page(zero_page); | 235 | __free_pages(zero_page, compound_order(zero_page)); |
236 | return HPAGE_PMD_NR; | 236 | return HPAGE_PMD_NR; |
237 | } | 237 | } |
238 | 238 | ||
@@ -803,7 +803,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
803 | return VM_FAULT_FALLBACK; | 803 | return VM_FAULT_FALLBACK; |
804 | if (unlikely(anon_vma_prepare(vma))) | 804 | if (unlikely(anon_vma_prepare(vma))) |
805 | return VM_FAULT_OOM; | 805 | return VM_FAULT_OOM; |
806 | if (unlikely(khugepaged_enter(vma))) | 806 | if (unlikely(khugepaged_enter(vma, vma->vm_flags))) |
807 | return VM_FAULT_OOM; | 807 | return VM_FAULT_OOM; |
808 | if (!(flags & FAULT_FLAG_WRITE) && | 808 | if (!(flags & FAULT_FLAG_WRITE) && |
809 | transparent_hugepage_use_zero_page()) { | 809 | transparent_hugepage_use_zero_page()) { |
@@ -1970,7 +1970,7 @@ int hugepage_madvise(struct vm_area_struct *vma, | |||
1970 | * register it here without waiting a page fault that | 1970 | * register it here without waiting a page fault that |
1971 | * may not happen any time soon. | 1971 | * may not happen any time soon. |
1972 | */ | 1972 | */ |
1973 | if (unlikely(khugepaged_enter_vma_merge(vma))) | 1973 | if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags))) |
1974 | return -ENOMEM; | 1974 | return -ENOMEM; |
1975 | break; | 1975 | break; |
1976 | case MADV_NOHUGEPAGE: | 1976 | case MADV_NOHUGEPAGE: |
@@ -2071,7 +2071,8 @@ int __khugepaged_enter(struct mm_struct *mm) | |||
2071 | return 0; | 2071 | return 0; |
2072 | } | 2072 | } |
2073 | 2073 | ||
2074 | int khugepaged_enter_vma_merge(struct vm_area_struct *vma) | 2074 | int khugepaged_enter_vma_merge(struct vm_area_struct *vma, |
2075 | unsigned long vm_flags) | ||
2075 | { | 2076 | { |
2076 | unsigned long hstart, hend; | 2077 | unsigned long hstart, hend; |
2077 | if (!vma->anon_vma) | 2078 | if (!vma->anon_vma) |
@@ -2083,11 +2084,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma) | |||
2083 | if (vma->vm_ops) | 2084 | if (vma->vm_ops) |
2084 | /* khugepaged not yet working on file or special mappings */ | 2085 | /* khugepaged not yet working on file or special mappings */ |
2085 | return 0; | 2086 | return 0; |
2086 | VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); | 2087 | VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); |
2087 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2088 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2088 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2089 | hend = vma->vm_end & HPAGE_PMD_MASK; |
2089 | if (hstart < hend) | 2090 | if (hstart < hend) |
2090 | return khugepaged_enter(vma); | 2091 | return khugepaged_enter(vma, vm_flags); |
2091 | return 0; | 2092 | return 0; |
2092 | } | 2093 | } |
2093 | 2094 | ||
diff --git a/mm/internal.h b/mm/internal.h index 829304090b90..a4f90ba7068e 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -108,6 +108,31 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); | |||
108 | /* | 108 | /* |
109 | * in mm/page_alloc.c | 109 | * in mm/page_alloc.c |
110 | */ | 110 | */ |
111 | |||
112 | /* | ||
113 | * Locate the struct page for both the matching buddy in our | ||
114 | * pair (buddy1) and the combined O(n+1) page they form (page). | ||
115 | * | ||
116 | * 1) Any buddy B1 will have an order O twin B2 which satisfies | ||
117 | * the following equation: | ||
118 | * B2 = B1 ^ (1 << O) | ||
119 | * For example, if the starting buddy (buddy2) is #8 its order | ||
120 | * 1 buddy is #10: | ||
121 | * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 | ||
122 | * | ||
123 | * 2) Any buddy B will have an order O+1 parent P which | ||
124 | * satisfies the following equation: | ||
125 | * P = B & ~(1 << O) | ||
126 | * | ||
127 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER | ||
128 | */ | ||
129 | static inline unsigned long | ||
130 | __find_buddy_index(unsigned long page_idx, unsigned int order) | ||
131 | { | ||
132 | return page_idx ^ (1 << order); | ||
133 | } | ||
134 | |||
135 | extern int __isolate_free_page(struct page *page, unsigned int order); | ||
111 | extern void __free_pages_bootmem(struct page *page, unsigned int order); | 136 | extern void __free_pages_bootmem(struct page *page, unsigned int order); |
112 | extern void prep_compound_page(struct page *page, unsigned long order); | 137 | extern void prep_compound_page(struct page *page, unsigned long order); |
113 | #ifdef CONFIG_MEMORY_FAILURE | 138 | #ifdef CONFIG_MEMORY_FAILURE |
diff --git a/mm/iov_iter.c b/mm/iov_iter.c index eafcf60f6b83..a1599ca4ab0e 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c | |||
@@ -3,95 +3,136 @@ | |||
3 | #include <linux/pagemap.h> | 3 | #include <linux/pagemap.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | #include <linux/vmalloc.h> | 5 | #include <linux/vmalloc.h> |
6 | 6 | #include <net/checksum.h> | |
7 | static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i) | 7 | |
8 | { | 8 | #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ |
9 | size_t skip, copy, left, wanted; | 9 | size_t left; \ |
10 | const struct iovec *iov; | 10 | size_t wanted = n; \ |
11 | char __user *buf; | 11 | __p = i->iov; \ |
12 | 12 | __v.iov_len = min(n, __p->iov_len - skip); \ | |
13 | if (unlikely(bytes > i->count)) | 13 | if (likely(__v.iov_len)) { \ |
14 | bytes = i->count; | 14 | __v.iov_base = __p->iov_base + skip; \ |
15 | 15 | left = (STEP); \ | |
16 | if (unlikely(!bytes)) | 16 | __v.iov_len -= left; \ |
17 | return 0; | 17 | skip += __v.iov_len; \ |
18 | 18 | n -= __v.iov_len; \ | |
19 | wanted = bytes; | 19 | } else { \ |
20 | iov = i->iov; | 20 | left = 0; \ |
21 | skip = i->iov_offset; | 21 | } \ |
22 | buf = iov->iov_base + skip; | 22 | while (unlikely(!left && n)) { \ |
23 | copy = min(bytes, iov->iov_len - skip); | 23 | __p++; \ |
24 | 24 | __v.iov_len = min(n, __p->iov_len); \ | |
25 | left = __copy_to_user(buf, from, copy); | 25 | if (unlikely(!__v.iov_len)) \ |
26 | copy -= left; | 26 | continue; \ |
27 | skip += copy; | 27 | __v.iov_base = __p->iov_base; \ |
28 | from += copy; | 28 | left = (STEP); \ |
29 | bytes -= copy; | 29 | __v.iov_len -= left; \ |
30 | while (unlikely(!left && bytes)) { | 30 | skip = __v.iov_len; \ |
31 | iov++; | 31 | n -= __v.iov_len; \ |
32 | buf = iov->iov_base; | 32 | } \ |
33 | copy = min(bytes, iov->iov_len); | 33 | n = wanted - n; \ |
34 | left = __copy_to_user(buf, from, copy); | 34 | } |
35 | copy -= left; | 35 | |
36 | skip = copy; | 36 | #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ |
37 | from += copy; | 37 | size_t wanted = n; \ |
38 | bytes -= copy; | 38 | __p = i->kvec; \ |
39 | } | 39 | __v.iov_len = min(n, __p->iov_len - skip); \ |
40 | 40 | if (likely(__v.iov_len)) { \ | |
41 | if (skip == iov->iov_len) { | 41 | __v.iov_base = __p->iov_base + skip; \ |
42 | iov++; | 42 | (void)(STEP); \ |
43 | skip = 0; | 43 | skip += __v.iov_len; \ |
44 | } | 44 | n -= __v.iov_len; \ |
45 | i->count -= wanted - bytes; | 45 | } \ |
46 | i->nr_segs -= iov - i->iov; | 46 | while (unlikely(n)) { \ |
47 | i->iov = iov; | 47 | __p++; \ |
48 | i->iov_offset = skip; | 48 | __v.iov_len = min(n, __p->iov_len); \ |
49 | return wanted - bytes; | 49 | if (unlikely(!__v.iov_len)) \ |
50 | } | 50 | continue; \ |
51 | 51 | __v.iov_base = __p->iov_base; \ | |
52 | static size_t copy_from_iter_iovec(void *to, size_t bytes, struct iov_iter *i) | 52 | (void)(STEP); \ |
53 | { | 53 | skip = __v.iov_len; \ |
54 | size_t skip, copy, left, wanted; | 54 | n -= __v.iov_len; \ |
55 | const struct iovec *iov; | 55 | } \ |
56 | char __user *buf; | 56 | n = wanted; \ |
57 | 57 | } | |
58 | if (unlikely(bytes > i->count)) | 58 | |
59 | bytes = i->count; | 59 | #define iterate_bvec(i, n, __v, __p, skip, STEP) { \ |
60 | 60 | size_t wanted = n; \ | |
61 | if (unlikely(!bytes)) | 61 | __p = i->bvec; \ |
62 | return 0; | 62 | __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \ |
63 | 63 | if (likely(__v.bv_len)) { \ | |
64 | wanted = bytes; | 64 | __v.bv_page = __p->bv_page; \ |
65 | iov = i->iov; | 65 | __v.bv_offset = __p->bv_offset + skip; \ |
66 | skip = i->iov_offset; | 66 | (void)(STEP); \ |
67 | buf = iov->iov_base + skip; | 67 | skip += __v.bv_len; \ |
68 | copy = min(bytes, iov->iov_len - skip); | 68 | n -= __v.bv_len; \ |
69 | 69 | } \ | |
70 | left = __copy_from_user(to, buf, copy); | 70 | while (unlikely(n)) { \ |
71 | copy -= left; | 71 | __p++; \ |
72 | skip += copy; | 72 | __v.bv_len = min_t(size_t, n, __p->bv_len); \ |
73 | to += copy; | 73 | if (unlikely(!__v.bv_len)) \ |
74 | bytes -= copy; | 74 | continue; \ |
75 | while (unlikely(!left && bytes)) { | 75 | __v.bv_page = __p->bv_page; \ |
76 | iov++; | 76 | __v.bv_offset = __p->bv_offset; \ |
77 | buf = iov->iov_base; | 77 | (void)(STEP); \ |
78 | copy = min(bytes, iov->iov_len); | 78 | skip = __v.bv_len; \ |
79 | left = __copy_from_user(to, buf, copy); | 79 | n -= __v.bv_len; \ |
80 | copy -= left; | 80 | } \ |
81 | skip = copy; | 81 | n = wanted; \ |
82 | to += copy; | 82 | } |
83 | bytes -= copy; | 83 | |
84 | } | 84 | #define iterate_all_kinds(i, n, v, I, B, K) { \ |
85 | 85 | size_t skip = i->iov_offset; \ | |
86 | if (skip == iov->iov_len) { | 86 | if (unlikely(i->type & ITER_BVEC)) { \ |
87 | iov++; | 87 | const struct bio_vec *bvec; \ |
88 | skip = 0; | 88 | struct bio_vec v; \ |
89 | } | 89 | iterate_bvec(i, n, v, bvec, skip, (B)) \ |
90 | i->count -= wanted - bytes; | 90 | } else if (unlikely(i->type & ITER_KVEC)) { \ |
91 | i->nr_segs -= iov - i->iov; | 91 | const struct kvec *kvec; \ |
92 | i->iov = iov; | 92 | struct kvec v; \ |
93 | i->iov_offset = skip; | 93 | iterate_kvec(i, n, v, kvec, skip, (K)) \ |
94 | return wanted - bytes; | 94 | } else { \ |
95 | const struct iovec *iov; \ | ||
96 | struct iovec v; \ | ||
97 | iterate_iovec(i, n, v, iov, skip, (I)) \ | ||
98 | } \ | ||
99 | } | ||
100 | |||
101 | #define iterate_and_advance(i, n, v, I, B, K) { \ | ||
102 | size_t skip = i->iov_offset; \ | ||
103 | if (unlikely(i->type & ITER_BVEC)) { \ | ||
104 | const struct bio_vec *bvec; \ | ||
105 | struct bio_vec v; \ | ||
106 | iterate_bvec(i, n, v, bvec, skip, (B)) \ | ||
107 | if (skip == bvec->bv_len) { \ | ||
108 | bvec++; \ | ||
109 | skip = 0; \ | ||
110 | } \ | ||
111 | i->nr_segs -= bvec - i->bvec; \ | ||
112 | i->bvec = bvec; \ | ||
113 | } else if (unlikely(i->type & ITER_KVEC)) { \ | ||
114 | const struct kvec *kvec; \ | ||
115 | struct kvec v; \ | ||
116 | iterate_kvec(i, n, v, kvec, skip, (K)) \ | ||
117 | if (skip == kvec->iov_len) { \ | ||
118 | kvec++; \ | ||
119 | skip = 0; \ | ||
120 | } \ | ||
121 | i->nr_segs -= kvec - i->kvec; \ | ||
122 | i->kvec = kvec; \ | ||
123 | } else { \ | ||
124 | const struct iovec *iov; \ | ||
125 | struct iovec v; \ | ||
126 | iterate_iovec(i, n, v, iov, skip, (I)) \ | ||
127 | if (skip == iov->iov_len) { \ | ||
128 | iov++; \ | ||
129 | skip = 0; \ | ||
130 | } \ | ||
131 | i->nr_segs -= iov - i->iov; \ | ||
132 | i->iov = iov; \ | ||
133 | } \ | ||
134 | i->count -= n; \ | ||
135 | i->iov_offset = skip; \ | ||
95 | } | 136 | } |
96 | 137 | ||
97 | static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, | 138 | static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, |
@@ -256,134 +297,6 @@ done: | |||
256 | return wanted - bytes; | 297 | return wanted - bytes; |
257 | } | 298 | } |
258 | 299 | ||
259 | static size_t zero_iovec(size_t bytes, struct iov_iter *i) | ||
260 | { | ||
261 | size_t skip, copy, left, wanted; | ||
262 | const struct iovec *iov; | ||
263 | char __user *buf; | ||
264 | |||
265 | if (unlikely(bytes > i->count)) | ||
266 | bytes = i->count; | ||
267 | |||
268 | if (unlikely(!bytes)) | ||
269 | return 0; | ||
270 | |||
271 | wanted = bytes; | ||
272 | iov = i->iov; | ||
273 | skip = i->iov_offset; | ||
274 | buf = iov->iov_base + skip; | ||
275 | copy = min(bytes, iov->iov_len - skip); | ||
276 | |||
277 | left = __clear_user(buf, copy); | ||
278 | copy -= left; | ||
279 | skip += copy; | ||
280 | bytes -= copy; | ||
281 | |||
282 | while (unlikely(!left && bytes)) { | ||
283 | iov++; | ||
284 | buf = iov->iov_base; | ||
285 | copy = min(bytes, iov->iov_len); | ||
286 | left = __clear_user(buf, copy); | ||
287 | copy -= left; | ||
288 | skip = copy; | ||
289 | bytes -= copy; | ||
290 | } | ||
291 | |||
292 | if (skip == iov->iov_len) { | ||
293 | iov++; | ||
294 | skip = 0; | ||
295 | } | ||
296 | i->count -= wanted - bytes; | ||
297 | i->nr_segs -= iov - i->iov; | ||
298 | i->iov = iov; | ||
299 | i->iov_offset = skip; | ||
300 | return wanted - bytes; | ||
301 | } | ||
302 | |||
303 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, | ||
304 | const struct iovec *iov, size_t base, size_t bytes) | ||
305 | { | ||
306 | size_t copied = 0, left = 0; | ||
307 | |||
308 | while (bytes) { | ||
309 | char __user *buf = iov->iov_base + base; | ||
310 | int copy = min(bytes, iov->iov_len - base); | ||
311 | |||
312 | base = 0; | ||
313 | left = __copy_from_user_inatomic(vaddr, buf, copy); | ||
314 | copied += copy; | ||
315 | bytes -= copy; | ||
316 | vaddr += copy; | ||
317 | iov++; | ||
318 | |||
319 | if (unlikely(left)) | ||
320 | break; | ||
321 | } | ||
322 | return copied - left; | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Copy as much as we can into the page and return the number of bytes which | ||
327 | * were successfully copied. If a fault is encountered then return the number of | ||
328 | * bytes which were copied. | ||
329 | */ | ||
330 | static size_t copy_from_user_atomic_iovec(struct page *page, | ||
331 | struct iov_iter *i, unsigned long offset, size_t bytes) | ||
332 | { | ||
333 | char *kaddr; | ||
334 | size_t copied; | ||
335 | |||
336 | kaddr = kmap_atomic(page); | ||
337 | if (likely(i->nr_segs == 1)) { | ||
338 | int left; | ||
339 | char __user *buf = i->iov->iov_base + i->iov_offset; | ||
340 | left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); | ||
341 | copied = bytes - left; | ||
342 | } else { | ||
343 | copied = __iovec_copy_from_user_inatomic(kaddr + offset, | ||
344 | i->iov, i->iov_offset, bytes); | ||
345 | } | ||
346 | kunmap_atomic(kaddr); | ||
347 | |||
348 | return copied; | ||
349 | } | ||
350 | |||
351 | static void advance_iovec(struct iov_iter *i, size_t bytes) | ||
352 | { | ||
353 | BUG_ON(i->count < bytes); | ||
354 | |||
355 | if (likely(i->nr_segs == 1)) { | ||
356 | i->iov_offset += bytes; | ||
357 | i->count -= bytes; | ||
358 | } else { | ||
359 | const struct iovec *iov = i->iov; | ||
360 | size_t base = i->iov_offset; | ||
361 | unsigned long nr_segs = i->nr_segs; | ||
362 | |||
363 | /* | ||
364 | * The !iov->iov_len check ensures we skip over unlikely | ||
365 | * zero-length segments (without overruning the iovec). | ||
366 | */ | ||
367 | while (bytes || unlikely(i->count && !iov->iov_len)) { | ||
368 | int copy; | ||
369 | |||
370 | copy = min(bytes, iov->iov_len - base); | ||
371 | BUG_ON(!i->count || i->count < copy); | ||
372 | i->count -= copy; | ||
373 | bytes -= copy; | ||
374 | base += copy; | ||
375 | if (iov->iov_len == base) { | ||
376 | iov++; | ||
377 | nr_segs--; | ||
378 | base = 0; | ||
379 | } | ||
380 | } | ||
381 | i->iov = iov; | ||
382 | i->iov_offset = base; | ||
383 | i->nr_segs = nr_segs; | ||
384 | } | ||
385 | } | ||
386 | |||
387 | /* | 300 | /* |
388 | * Fault in the first iovec of the given iov_iter, to a maximum length | 301 | * Fault in the first iovec of the given iov_iter, to a maximum length |
389 | * of bytes. Returns 0 on success, or non-zero if the memory could not be | 302 | * of bytes. Returns 0 on success, or non-zero if the memory could not be |
@@ -395,7 +308,7 @@ static void advance_iovec(struct iov_iter *i, size_t bytes) | |||
395 | */ | 308 | */ |
396 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) | 309 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) |
397 | { | 310 | { |
398 | if (!(i->type & ITER_BVEC)) { | 311 | if (!(i->type & (ITER_BVEC|ITER_KVEC))) { |
399 | char __user *buf = i->iov->iov_base + i->iov_offset; | 312 | char __user *buf = i->iov->iov_base + i->iov_offset; |
400 | bytes = min(bytes, i->iov->iov_len - i->iov_offset); | 313 | bytes = min(bytes, i->iov->iov_len - i->iov_offset); |
401 | return fault_in_pages_readable(buf, bytes); | 314 | return fault_in_pages_readable(buf, bytes); |
@@ -404,136 +317,25 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) | |||
404 | } | 317 | } |
405 | EXPORT_SYMBOL(iov_iter_fault_in_readable); | 318 | EXPORT_SYMBOL(iov_iter_fault_in_readable); |
406 | 319 | ||
407 | static unsigned long alignment_iovec(const struct iov_iter *i) | ||
408 | { | ||
409 | const struct iovec *iov = i->iov; | ||
410 | unsigned long res; | ||
411 | size_t size = i->count; | ||
412 | size_t n; | ||
413 | |||
414 | if (!size) | ||
415 | return 0; | ||
416 | |||
417 | res = (unsigned long)iov->iov_base + i->iov_offset; | ||
418 | n = iov->iov_len - i->iov_offset; | ||
419 | if (n >= size) | ||
420 | return res | size; | ||
421 | size -= n; | ||
422 | res |= n; | ||
423 | while (size > (++iov)->iov_len) { | ||
424 | res |= (unsigned long)iov->iov_base | iov->iov_len; | ||
425 | size -= iov->iov_len; | ||
426 | } | ||
427 | res |= (unsigned long)iov->iov_base | size; | ||
428 | return res; | ||
429 | } | ||
430 | |||
431 | void iov_iter_init(struct iov_iter *i, int direction, | 320 | void iov_iter_init(struct iov_iter *i, int direction, |
432 | const struct iovec *iov, unsigned long nr_segs, | 321 | const struct iovec *iov, unsigned long nr_segs, |
433 | size_t count) | 322 | size_t count) |
434 | { | 323 | { |
435 | /* It will get better. Eventually... */ | 324 | /* It will get better. Eventually... */ |
436 | if (segment_eq(get_fs(), KERNEL_DS)) | 325 | if (segment_eq(get_fs(), KERNEL_DS)) { |
437 | direction |= ITER_KVEC; | 326 | direction |= ITER_KVEC; |
438 | i->type = direction; | 327 | i->type = direction; |
439 | i->iov = iov; | 328 | i->kvec = (struct kvec *)iov; |
329 | } else { | ||
330 | i->type = direction; | ||
331 | i->iov = iov; | ||
332 | } | ||
440 | i->nr_segs = nr_segs; | 333 | i->nr_segs = nr_segs; |
441 | i->iov_offset = 0; | 334 | i->iov_offset = 0; |
442 | i->count = count; | 335 | i->count = count; |
443 | } | 336 | } |
444 | EXPORT_SYMBOL(iov_iter_init); | 337 | EXPORT_SYMBOL(iov_iter_init); |
445 | 338 | ||
446 | static ssize_t get_pages_iovec(struct iov_iter *i, | ||
447 | struct page **pages, size_t maxsize, unsigned maxpages, | ||
448 | size_t *start) | ||
449 | { | ||
450 | size_t offset = i->iov_offset; | ||
451 | const struct iovec *iov = i->iov; | ||
452 | size_t len; | ||
453 | unsigned long addr; | ||
454 | int n; | ||
455 | int res; | ||
456 | |||
457 | len = iov->iov_len - offset; | ||
458 | if (len > i->count) | ||
459 | len = i->count; | ||
460 | if (len > maxsize) | ||
461 | len = maxsize; | ||
462 | addr = (unsigned long)iov->iov_base + offset; | ||
463 | len += *start = addr & (PAGE_SIZE - 1); | ||
464 | if (len > maxpages * PAGE_SIZE) | ||
465 | len = maxpages * PAGE_SIZE; | ||
466 | addr &= ~(PAGE_SIZE - 1); | ||
467 | n = (len + PAGE_SIZE - 1) / PAGE_SIZE; | ||
468 | res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); | ||
469 | if (unlikely(res < 0)) | ||
470 | return res; | ||
471 | return (res == n ? len : res * PAGE_SIZE) - *start; | ||
472 | } | ||
473 | |||
474 | static ssize_t get_pages_alloc_iovec(struct iov_iter *i, | ||
475 | struct page ***pages, size_t maxsize, | ||
476 | size_t *start) | ||
477 | { | ||
478 | size_t offset = i->iov_offset; | ||
479 | const struct iovec *iov = i->iov; | ||
480 | size_t len; | ||
481 | unsigned long addr; | ||
482 | void *p; | ||
483 | int n; | ||
484 | int res; | ||
485 | |||
486 | len = iov->iov_len - offset; | ||
487 | if (len > i->count) | ||
488 | len = i->count; | ||
489 | if (len > maxsize) | ||
490 | len = maxsize; | ||
491 | addr = (unsigned long)iov->iov_base + offset; | ||
492 | len += *start = addr & (PAGE_SIZE - 1); | ||
493 | addr &= ~(PAGE_SIZE - 1); | ||
494 | n = (len + PAGE_SIZE - 1) / PAGE_SIZE; | ||
495 | |||
496 | p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); | ||
497 | if (!p) | ||
498 | p = vmalloc(n * sizeof(struct page *)); | ||
499 | if (!p) | ||
500 | return -ENOMEM; | ||
501 | |||
502 | res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); | ||
503 | if (unlikely(res < 0)) { | ||
504 | kvfree(p); | ||
505 | return res; | ||
506 | } | ||
507 | *pages = p; | ||
508 | return (res == n ? len : res * PAGE_SIZE) - *start; | ||
509 | } | ||
510 | |||
511 | static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages) | ||
512 | { | ||
513 | size_t offset = i->iov_offset; | ||
514 | size_t size = i->count; | ||
515 | const struct iovec *iov = i->iov; | ||
516 | int npages = 0; | ||
517 | int n; | ||
518 | |||
519 | for (n = 0; size && n < i->nr_segs; n++, iov++) { | ||
520 | unsigned long addr = (unsigned long)iov->iov_base + offset; | ||
521 | size_t len = iov->iov_len - offset; | ||
522 | offset = 0; | ||
523 | if (unlikely(!len)) /* empty segment */ | ||
524 | continue; | ||
525 | if (len > size) | ||
526 | len = size; | ||
527 | npages += (addr + len + PAGE_SIZE - 1) / PAGE_SIZE | ||
528 | - addr / PAGE_SIZE; | ||
529 | if (npages >= maxpages) /* don't bother going further */ | ||
530 | return maxpages; | ||
531 | size -= len; | ||
532 | offset = 0; | ||
533 | } | ||
534 | return min(npages, maxpages); | ||
535 | } | ||
536 | |||
537 | static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) | 339 | static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) |
538 | { | 340 | { |
539 | char *from = kmap_atomic(page); | 341 | char *from = kmap_atomic(page); |
@@ -555,293 +357,78 @@ static void memzero_page(struct page *page, size_t offset, size_t len) | |||
555 | kunmap_atomic(addr); | 357 | kunmap_atomic(addr); |
556 | } | 358 | } |
557 | 359 | ||
558 | static size_t copy_to_iter_bvec(void *from, size_t bytes, struct iov_iter *i) | 360 | size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i) |
559 | { | 361 | { |
560 | size_t skip, copy, wanted; | 362 | char *from = addr; |
561 | const struct bio_vec *bvec; | ||
562 | |||
563 | if (unlikely(bytes > i->count)) | 363 | if (unlikely(bytes > i->count)) |
564 | bytes = i->count; | 364 | bytes = i->count; |
565 | 365 | ||
566 | if (unlikely(!bytes)) | 366 | if (unlikely(!bytes)) |
567 | return 0; | 367 | return 0; |
568 | 368 | ||
569 | wanted = bytes; | 369 | iterate_and_advance(i, bytes, v, |
570 | bvec = i->bvec; | 370 | __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, |
571 | skip = i->iov_offset; | 371 | v.iov_len), |
572 | copy = min_t(size_t, bytes, bvec->bv_len - skip); | 372 | memcpy_to_page(v.bv_page, v.bv_offset, |
373 | (from += v.bv_len) - v.bv_len, v.bv_len), | ||
374 | memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) | ||
375 | ) | ||
573 | 376 | ||
574 | memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy); | 377 | return bytes; |
575 | skip += copy; | ||
576 | from += copy; | ||
577 | bytes -= copy; | ||
578 | while (bytes) { | ||
579 | bvec++; | ||
580 | copy = min(bytes, (size_t)bvec->bv_len); | ||
581 | memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy); | ||
582 | skip = copy; | ||
583 | from += copy; | ||
584 | bytes -= copy; | ||
585 | } | ||
586 | if (skip == bvec->bv_len) { | ||
587 | bvec++; | ||
588 | skip = 0; | ||
589 | } | ||
590 | i->count -= wanted - bytes; | ||
591 | i->nr_segs -= bvec - i->bvec; | ||
592 | i->bvec = bvec; | ||
593 | i->iov_offset = skip; | ||
594 | return wanted - bytes; | ||
595 | } | 378 | } |
379 | EXPORT_SYMBOL(copy_to_iter); | ||
596 | 380 | ||
597 | static size_t copy_from_iter_bvec(void *to, size_t bytes, struct iov_iter *i) | 381 | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) |
598 | { | 382 | { |
599 | size_t skip, copy, wanted; | 383 | char *to = addr; |
600 | const struct bio_vec *bvec; | ||
601 | |||
602 | if (unlikely(bytes > i->count)) | 384 | if (unlikely(bytes > i->count)) |
603 | bytes = i->count; | 385 | bytes = i->count; |
604 | 386 | ||
605 | if (unlikely(!bytes)) | 387 | if (unlikely(!bytes)) |
606 | return 0; | 388 | return 0; |
607 | 389 | ||
608 | wanted = bytes; | 390 | iterate_and_advance(i, bytes, v, |
609 | bvec = i->bvec; | 391 | __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, |
610 | skip = i->iov_offset; | 392 | v.iov_len), |
611 | 393 | memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, | |
612 | copy = min(bytes, bvec->bv_len - skip); | 394 | v.bv_offset, v.bv_len), |
613 | 395 | memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) | |
614 | memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy); | 396 | ) |
615 | |||
616 | to += copy; | ||
617 | skip += copy; | ||
618 | bytes -= copy; | ||
619 | |||
620 | while (bytes) { | ||
621 | bvec++; | ||
622 | copy = min(bytes, (size_t)bvec->bv_len); | ||
623 | memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy); | ||
624 | skip = copy; | ||
625 | to += copy; | ||
626 | bytes -= copy; | ||
627 | } | ||
628 | if (skip == bvec->bv_len) { | ||
629 | bvec++; | ||
630 | skip = 0; | ||
631 | } | ||
632 | i->count -= wanted; | ||
633 | i->nr_segs -= bvec - i->bvec; | ||
634 | i->bvec = bvec; | ||
635 | i->iov_offset = skip; | ||
636 | return wanted; | ||
637 | } | ||
638 | |||
639 | static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, | ||
640 | size_t bytes, struct iov_iter *i) | ||
641 | { | ||
642 | void *kaddr = kmap_atomic(page); | ||
643 | size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i); | ||
644 | kunmap_atomic(kaddr); | ||
645 | return wanted; | ||
646 | } | ||
647 | 397 | ||
648 | static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, | 398 | return bytes; |
649 | size_t bytes, struct iov_iter *i) | ||
650 | { | ||
651 | void *kaddr = kmap_atomic(page); | ||
652 | size_t wanted = copy_from_iter_bvec(kaddr + offset, bytes, i); | ||
653 | kunmap_atomic(kaddr); | ||
654 | return wanted; | ||
655 | } | 399 | } |
400 | EXPORT_SYMBOL(copy_from_iter); | ||
656 | 401 | ||
657 | static size_t zero_bvec(size_t bytes, struct iov_iter *i) | 402 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) |
658 | { | 403 | { |
659 | size_t skip, copy, wanted; | 404 | char *to = addr; |
660 | const struct bio_vec *bvec; | ||
661 | |||
662 | if (unlikely(bytes > i->count)) | 405 | if (unlikely(bytes > i->count)) |
663 | bytes = i->count; | 406 | bytes = i->count; |
664 | 407 | ||
665 | if (unlikely(!bytes)) | 408 | if (unlikely(!bytes)) |
666 | return 0; | 409 | return 0; |
667 | 410 | ||
668 | wanted = bytes; | 411 | iterate_and_advance(i, bytes, v, |
669 | bvec = i->bvec; | 412 | __copy_from_user_nocache((to += v.iov_len) - v.iov_len, |
670 | skip = i->iov_offset; | 413 | v.iov_base, v.iov_len), |
671 | copy = min_t(size_t, bytes, bvec->bv_len - skip); | 414 | memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, |
415 | v.bv_offset, v.bv_len), | ||
416 | memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) | ||
417 | ) | ||
672 | 418 | ||
673 | memzero_page(bvec->bv_page, skip + bvec->bv_offset, copy); | ||
674 | skip += copy; | ||
675 | bytes -= copy; | ||
676 | while (bytes) { | ||
677 | bvec++; | ||
678 | copy = min(bytes, (size_t)bvec->bv_len); | ||
679 | memzero_page(bvec->bv_page, bvec->bv_offset, copy); | ||
680 | skip = copy; | ||
681 | bytes -= copy; | ||
682 | } | ||
683 | if (skip == bvec->bv_len) { | ||
684 | bvec++; | ||
685 | skip = 0; | ||
686 | } | ||
687 | i->count -= wanted - bytes; | ||
688 | i->nr_segs -= bvec - i->bvec; | ||
689 | i->bvec = bvec; | ||
690 | i->iov_offset = skip; | ||
691 | return wanted - bytes; | ||
692 | } | ||
693 | |||
694 | static size_t copy_from_user_bvec(struct page *page, | ||
695 | struct iov_iter *i, unsigned long offset, size_t bytes) | ||
696 | { | ||
697 | char *kaddr; | ||
698 | size_t left; | ||
699 | const struct bio_vec *bvec; | ||
700 | size_t base = i->iov_offset; | ||
701 | |||
702 | kaddr = kmap_atomic(page); | ||
703 | for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) { | ||
704 | size_t copy = min(left, bvec->bv_len - base); | ||
705 | if (!bvec->bv_len) | ||
706 | continue; | ||
707 | memcpy_from_page(kaddr + offset, bvec->bv_page, | ||
708 | bvec->bv_offset + base, copy); | ||
709 | offset += copy; | ||
710 | left -= copy; | ||
711 | } | ||
712 | kunmap_atomic(kaddr); | ||
713 | return bytes; | 419 | return bytes; |
714 | } | 420 | } |
715 | 421 | EXPORT_SYMBOL(copy_from_iter_nocache); | |
716 | static void advance_bvec(struct iov_iter *i, size_t bytes) | ||
717 | { | ||
718 | BUG_ON(i->count < bytes); | ||
719 | |||
720 | if (likely(i->nr_segs == 1)) { | ||
721 | i->iov_offset += bytes; | ||
722 | i->count -= bytes; | ||
723 | } else { | ||
724 | const struct bio_vec *bvec = i->bvec; | ||
725 | size_t base = i->iov_offset; | ||
726 | unsigned long nr_segs = i->nr_segs; | ||
727 | |||
728 | /* | ||
729 | * The !iov->iov_len check ensures we skip over unlikely | ||
730 | * zero-length segments (without overruning the iovec). | ||
731 | */ | ||
732 | while (bytes || unlikely(i->count && !bvec->bv_len)) { | ||
733 | int copy; | ||
734 | |||
735 | copy = min(bytes, bvec->bv_len - base); | ||
736 | BUG_ON(!i->count || i->count < copy); | ||
737 | i->count -= copy; | ||
738 | bytes -= copy; | ||
739 | base += copy; | ||
740 | if (bvec->bv_len == base) { | ||
741 | bvec++; | ||
742 | nr_segs--; | ||
743 | base = 0; | ||
744 | } | ||
745 | } | ||
746 | i->bvec = bvec; | ||
747 | i->iov_offset = base; | ||
748 | i->nr_segs = nr_segs; | ||
749 | } | ||
750 | } | ||
751 | |||
752 | static unsigned long alignment_bvec(const struct iov_iter *i) | ||
753 | { | ||
754 | const struct bio_vec *bvec = i->bvec; | ||
755 | unsigned long res; | ||
756 | size_t size = i->count; | ||
757 | size_t n; | ||
758 | |||
759 | if (!size) | ||
760 | return 0; | ||
761 | |||
762 | res = bvec->bv_offset + i->iov_offset; | ||
763 | n = bvec->bv_len - i->iov_offset; | ||
764 | if (n >= size) | ||
765 | return res | size; | ||
766 | size -= n; | ||
767 | res |= n; | ||
768 | while (size > (++bvec)->bv_len) { | ||
769 | res |= bvec->bv_offset | bvec->bv_len; | ||
770 | size -= bvec->bv_len; | ||
771 | } | ||
772 | res |= bvec->bv_offset | size; | ||
773 | return res; | ||
774 | } | ||
775 | |||
776 | static ssize_t get_pages_bvec(struct iov_iter *i, | ||
777 | struct page **pages, size_t maxsize, unsigned maxpages, | ||
778 | size_t *start) | ||
779 | { | ||
780 | const struct bio_vec *bvec = i->bvec; | ||
781 | size_t len = bvec->bv_len - i->iov_offset; | ||
782 | if (len > i->count) | ||
783 | len = i->count; | ||
784 | if (len > maxsize) | ||
785 | len = maxsize; | ||
786 | /* can't be more than PAGE_SIZE */ | ||
787 | *start = bvec->bv_offset + i->iov_offset; | ||
788 | |||
789 | get_page(*pages = bvec->bv_page); | ||
790 | |||
791 | return len; | ||
792 | } | ||
793 | |||
794 | static ssize_t get_pages_alloc_bvec(struct iov_iter *i, | ||
795 | struct page ***pages, size_t maxsize, | ||
796 | size_t *start) | ||
797 | { | ||
798 | const struct bio_vec *bvec = i->bvec; | ||
799 | size_t len = bvec->bv_len - i->iov_offset; | ||
800 | if (len > i->count) | ||
801 | len = i->count; | ||
802 | if (len > maxsize) | ||
803 | len = maxsize; | ||
804 | *start = bvec->bv_offset + i->iov_offset; | ||
805 | |||
806 | *pages = kmalloc(sizeof(struct page *), GFP_KERNEL); | ||
807 | if (!*pages) | ||
808 | return -ENOMEM; | ||
809 | |||
810 | get_page(**pages = bvec->bv_page); | ||
811 | |||
812 | return len; | ||
813 | } | ||
814 | |||
815 | static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages) | ||
816 | { | ||
817 | size_t offset = i->iov_offset; | ||
818 | size_t size = i->count; | ||
819 | const struct bio_vec *bvec = i->bvec; | ||
820 | int npages = 0; | ||
821 | int n; | ||
822 | |||
823 | for (n = 0; size && n < i->nr_segs; n++, bvec++) { | ||
824 | size_t len = bvec->bv_len - offset; | ||
825 | offset = 0; | ||
826 | if (unlikely(!len)) /* empty segment */ | ||
827 | continue; | ||
828 | if (len > size) | ||
829 | len = size; | ||
830 | npages++; | ||
831 | if (npages >= maxpages) /* don't bother going further */ | ||
832 | return maxpages; | ||
833 | size -= len; | ||
834 | offset = 0; | ||
835 | } | ||
836 | return min(npages, maxpages); | ||
837 | } | ||
838 | 422 | ||
839 | size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, | 423 | size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, |
840 | struct iov_iter *i) | 424 | struct iov_iter *i) |
841 | { | 425 | { |
842 | if (i->type & ITER_BVEC) | 426 | if (i->type & (ITER_BVEC|ITER_KVEC)) { |
843 | return copy_page_to_iter_bvec(page, offset, bytes, i); | 427 | void *kaddr = kmap_atomic(page); |
844 | else | 428 | size_t wanted = copy_to_iter(kaddr + offset, bytes, i); |
429 | kunmap_atomic(kaddr); | ||
430 | return wanted; | ||
431 | } else | ||
845 | return copy_page_to_iter_iovec(page, offset, bytes, i); | 432 | return copy_page_to_iter_iovec(page, offset, bytes, i); |
846 | } | 433 | } |
847 | EXPORT_SYMBOL(copy_page_to_iter); | 434 | EXPORT_SYMBOL(copy_page_to_iter); |
@@ -849,57 +436,53 @@ EXPORT_SYMBOL(copy_page_to_iter); | |||
849 | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | 436 | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, |
850 | struct iov_iter *i) | 437 | struct iov_iter *i) |
851 | { | 438 | { |
852 | if (i->type & ITER_BVEC) | 439 | if (i->type & (ITER_BVEC|ITER_KVEC)) { |
853 | return copy_page_from_iter_bvec(page, offset, bytes, i); | 440 | void *kaddr = kmap_atomic(page); |
854 | else | 441 | size_t wanted = copy_from_iter(kaddr + offset, bytes, i); |
442 | kunmap_atomic(kaddr); | ||
443 | return wanted; | ||
444 | } else | ||
855 | return copy_page_from_iter_iovec(page, offset, bytes, i); | 445 | return copy_page_from_iter_iovec(page, offset, bytes, i); |
856 | } | 446 | } |
857 | EXPORT_SYMBOL(copy_page_from_iter); | 447 | EXPORT_SYMBOL(copy_page_from_iter); |
858 | 448 | ||
859 | size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i) | 449 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) |
860 | { | 450 | { |
861 | if (i->type & ITER_BVEC) | 451 | if (unlikely(bytes > i->count)) |
862 | return copy_to_iter_bvec(addr, bytes, i); | 452 | bytes = i->count; |
863 | else | ||
864 | return copy_to_iter_iovec(addr, bytes, i); | ||
865 | } | ||
866 | EXPORT_SYMBOL(copy_to_iter); | ||
867 | 453 | ||
868 | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) | 454 | if (unlikely(!bytes)) |
869 | { | 455 | return 0; |
870 | if (i->type & ITER_BVEC) | ||
871 | return copy_from_iter_bvec(addr, bytes, i); | ||
872 | else | ||
873 | return copy_from_iter_iovec(addr, bytes, i); | ||
874 | } | ||
875 | EXPORT_SYMBOL(copy_from_iter); | ||
876 | 456 | ||
877 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) | 457 | iterate_and_advance(i, bytes, v, |
878 | { | 458 | __clear_user(v.iov_base, v.iov_len), |
879 | if (i->type & ITER_BVEC) { | 459 | memzero_page(v.bv_page, v.bv_offset, v.bv_len), |
880 | return zero_bvec(bytes, i); | 460 | memset(v.iov_base, 0, v.iov_len) |
881 | } else { | 461 | ) |
882 | return zero_iovec(bytes, i); | 462 | |
883 | } | 463 | return bytes; |
884 | } | 464 | } |
885 | EXPORT_SYMBOL(iov_iter_zero); | 465 | EXPORT_SYMBOL(iov_iter_zero); |
886 | 466 | ||
887 | size_t iov_iter_copy_from_user_atomic(struct page *page, | 467 | size_t iov_iter_copy_from_user_atomic(struct page *page, |
888 | struct iov_iter *i, unsigned long offset, size_t bytes) | 468 | struct iov_iter *i, unsigned long offset, size_t bytes) |
889 | { | 469 | { |
890 | if (i->type & ITER_BVEC) | 470 | char *kaddr = kmap_atomic(page), *p = kaddr + offset; |
891 | return copy_from_user_bvec(page, i, offset, bytes); | 471 | iterate_all_kinds(i, bytes, v, |
892 | else | 472 | __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, |
893 | return copy_from_user_atomic_iovec(page, i, offset, bytes); | 473 | v.iov_base, v.iov_len), |
474 | memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, | ||
475 | v.bv_offset, v.bv_len), | ||
476 | memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) | ||
477 | ) | ||
478 | kunmap_atomic(kaddr); | ||
479 | return bytes; | ||
894 | } | 480 | } |
895 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); | 481 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); |
896 | 482 | ||
897 | void iov_iter_advance(struct iov_iter *i, size_t size) | 483 | void iov_iter_advance(struct iov_iter *i, size_t size) |
898 | { | 484 | { |
899 | if (i->type & ITER_BVEC) | 485 | iterate_and_advance(i, size, v, 0, 0, 0) |
900 | advance_bvec(i, size); | ||
901 | else | ||
902 | advance_iovec(i, size); | ||
903 | } | 486 | } |
904 | EXPORT_SYMBOL(iov_iter_advance); | 487 | EXPORT_SYMBOL(iov_iter_advance); |
905 | 488 | ||
@@ -911,18 +494,39 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i) | |||
911 | if (i->nr_segs == 1) | 494 | if (i->nr_segs == 1) |
912 | return i->count; | 495 | return i->count; |
913 | else if (i->type & ITER_BVEC) | 496 | else if (i->type & ITER_BVEC) |
914 | return min(i->count, i->iov->iov_len - i->iov_offset); | ||
915 | else | ||
916 | return min(i->count, i->bvec->bv_len - i->iov_offset); | 497 | return min(i->count, i->bvec->bv_len - i->iov_offset); |
498 | else | ||
499 | return min(i->count, i->iov->iov_len - i->iov_offset); | ||
917 | } | 500 | } |
918 | EXPORT_SYMBOL(iov_iter_single_seg_count); | 501 | EXPORT_SYMBOL(iov_iter_single_seg_count); |
919 | 502 | ||
503 | void iov_iter_kvec(struct iov_iter *i, int direction, | ||
504 | const struct kvec *iov, unsigned long nr_segs, | ||
505 | size_t count) | ||
506 | { | ||
507 | BUG_ON(!(direction & ITER_KVEC)); | ||
508 | i->type = direction; | ||
509 | i->kvec = (struct kvec *)iov; | ||
510 | i->nr_segs = nr_segs; | ||
511 | i->iov_offset = 0; | ||
512 | i->count = count; | ||
513 | } | ||
514 | EXPORT_SYMBOL(iov_iter_kvec); | ||
515 | |||
920 | unsigned long iov_iter_alignment(const struct iov_iter *i) | 516 | unsigned long iov_iter_alignment(const struct iov_iter *i) |
921 | { | 517 | { |
922 | if (i->type & ITER_BVEC) | 518 | unsigned long res = 0; |
923 | return alignment_bvec(i); | 519 | size_t size = i->count; |
924 | else | 520 | |
925 | return alignment_iovec(i); | 521 | if (!size) |
522 | return 0; | ||
523 | |||
524 | iterate_all_kinds(i, size, v, | ||
525 | (res |= (unsigned long)v.iov_base | v.iov_len, 0), | ||
526 | res |= v.bv_offset | v.bv_len, | ||
527 | res |= (unsigned long)v.iov_base | v.iov_len | ||
528 | ) | ||
529 | return res; | ||
926 | } | 530 | } |
927 | EXPORT_SYMBOL(iov_iter_alignment); | 531 | EXPORT_SYMBOL(iov_iter_alignment); |
928 | 532 | ||
@@ -930,29 +534,207 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, | |||
930 | struct page **pages, size_t maxsize, unsigned maxpages, | 534 | struct page **pages, size_t maxsize, unsigned maxpages, |
931 | size_t *start) | 535 | size_t *start) |
932 | { | 536 | { |
933 | if (i->type & ITER_BVEC) | 537 | if (maxsize > i->count) |
934 | return get_pages_bvec(i, pages, maxsize, maxpages, start); | 538 | maxsize = i->count; |
935 | else | 539 | |
936 | return get_pages_iovec(i, pages, maxsize, maxpages, start); | 540 | if (!maxsize) |
541 | return 0; | ||
542 | |||
543 | iterate_all_kinds(i, maxsize, v, ({ | ||
544 | unsigned long addr = (unsigned long)v.iov_base; | ||
545 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | ||
546 | int n; | ||
547 | int res; | ||
548 | |||
549 | if (len > maxpages * PAGE_SIZE) | ||
550 | len = maxpages * PAGE_SIZE; | ||
551 | addr &= ~(PAGE_SIZE - 1); | ||
552 | n = DIV_ROUND_UP(len, PAGE_SIZE); | ||
553 | res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); | ||
554 | if (unlikely(res < 0)) | ||
555 | return res; | ||
556 | return (res == n ? len : res * PAGE_SIZE) - *start; | ||
557 | 0;}),({ | ||
558 | /* can't be more than PAGE_SIZE */ | ||
559 | *start = v.bv_offset; | ||
560 | get_page(*pages = v.bv_page); | ||
561 | return v.bv_len; | ||
562 | }),({ | ||
563 | return -EFAULT; | ||
564 | }) | ||
565 | ) | ||
566 | return 0; | ||
937 | } | 567 | } |
938 | EXPORT_SYMBOL(iov_iter_get_pages); | 568 | EXPORT_SYMBOL(iov_iter_get_pages); |
939 | 569 | ||
570 | static struct page **get_pages_array(size_t n) | ||
571 | { | ||
572 | struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); | ||
573 | if (!p) | ||
574 | p = vmalloc(n * sizeof(struct page *)); | ||
575 | return p; | ||
576 | } | ||
577 | |||
940 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | 578 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, |
941 | struct page ***pages, size_t maxsize, | 579 | struct page ***pages, size_t maxsize, |
942 | size_t *start) | 580 | size_t *start) |
943 | { | 581 | { |
944 | if (i->type & ITER_BVEC) | 582 | struct page **p; |
945 | return get_pages_alloc_bvec(i, pages, maxsize, start); | 583 | |
946 | else | 584 | if (maxsize > i->count) |
947 | return get_pages_alloc_iovec(i, pages, maxsize, start); | 585 | maxsize = i->count; |
586 | |||
587 | if (!maxsize) | ||
588 | return 0; | ||
589 | |||
590 | iterate_all_kinds(i, maxsize, v, ({ | ||
591 | unsigned long addr = (unsigned long)v.iov_base; | ||
592 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | ||
593 | int n; | ||
594 | int res; | ||
595 | |||
596 | addr &= ~(PAGE_SIZE - 1); | ||
597 | n = DIV_ROUND_UP(len, PAGE_SIZE); | ||
598 | p = get_pages_array(n); | ||
599 | if (!p) | ||
600 | return -ENOMEM; | ||
601 | res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); | ||
602 | if (unlikely(res < 0)) { | ||
603 | kvfree(p); | ||
604 | return res; | ||
605 | } | ||
606 | *pages = p; | ||
607 | return (res == n ? len : res * PAGE_SIZE) - *start; | ||
608 | 0;}),({ | ||
609 | /* can't be more than PAGE_SIZE */ | ||
610 | *start = v.bv_offset; | ||
611 | *pages = p = get_pages_array(1); | ||
612 | if (!p) | ||
613 | return -ENOMEM; | ||
614 | get_page(*p = v.bv_page); | ||
615 | return v.bv_len; | ||
616 | }),({ | ||
617 | return -EFAULT; | ||
618 | }) | ||
619 | ) | ||
620 | return 0; | ||
948 | } | 621 | } |
949 | EXPORT_SYMBOL(iov_iter_get_pages_alloc); | 622 | EXPORT_SYMBOL(iov_iter_get_pages_alloc); |
950 | 623 | ||
624 | size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, | ||
625 | struct iov_iter *i) | ||
626 | { | ||
627 | char *to = addr; | ||
628 | __wsum sum, next; | ||
629 | size_t off = 0; | ||
630 | if (unlikely(bytes > i->count)) | ||
631 | bytes = i->count; | ||
632 | |||
633 | if (unlikely(!bytes)) | ||
634 | return 0; | ||
635 | |||
636 | sum = *csum; | ||
637 | iterate_and_advance(i, bytes, v, ({ | ||
638 | int err = 0; | ||
639 | next = csum_and_copy_from_user(v.iov_base, | ||
640 | (to += v.iov_len) - v.iov_len, | ||
641 | v.iov_len, 0, &err); | ||
642 | if (!err) { | ||
643 | sum = csum_block_add(sum, next, off); | ||
644 | off += v.iov_len; | ||
645 | } | ||
646 | err ? v.iov_len : 0; | ||
647 | }), ({ | ||
648 | char *p = kmap_atomic(v.bv_page); | ||
649 | next = csum_partial_copy_nocheck(p + v.bv_offset, | ||
650 | (to += v.bv_len) - v.bv_len, | ||
651 | v.bv_len, 0); | ||
652 | kunmap_atomic(p); | ||
653 | sum = csum_block_add(sum, next, off); | ||
654 | off += v.bv_len; | ||
655 | }),({ | ||
656 | next = csum_partial_copy_nocheck(v.iov_base, | ||
657 | (to += v.iov_len) - v.iov_len, | ||
658 | v.iov_len, 0); | ||
659 | sum = csum_block_add(sum, next, off); | ||
660 | off += v.iov_len; | ||
661 | }) | ||
662 | ) | ||
663 | *csum = sum; | ||
664 | return bytes; | ||
665 | } | ||
666 | EXPORT_SYMBOL(csum_and_copy_from_iter); | ||
667 | |||
668 | size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, | ||
669 | struct iov_iter *i) | ||
670 | { | ||
671 | char *from = addr; | ||
672 | __wsum sum, next; | ||
673 | size_t off = 0; | ||
674 | if (unlikely(bytes > i->count)) | ||
675 | bytes = i->count; | ||
676 | |||
677 | if (unlikely(!bytes)) | ||
678 | return 0; | ||
679 | |||
680 | sum = *csum; | ||
681 | iterate_and_advance(i, bytes, v, ({ | ||
682 | int err = 0; | ||
683 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, | ||
684 | v.iov_base, | ||
685 | v.iov_len, 0, &err); | ||
686 | if (!err) { | ||
687 | sum = csum_block_add(sum, next, off); | ||
688 | off += v.iov_len; | ||
689 | } | ||
690 | err ? v.iov_len : 0; | ||
691 | }), ({ | ||
692 | char *p = kmap_atomic(v.bv_page); | ||
693 | next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, | ||
694 | p + v.bv_offset, | ||
695 | v.bv_len, 0); | ||
696 | kunmap_atomic(p); | ||
697 | sum = csum_block_add(sum, next, off); | ||
698 | off += v.bv_len; | ||
699 | }),({ | ||
700 | next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, | ||
701 | v.iov_base, | ||
702 | v.iov_len, 0); | ||
703 | sum = csum_block_add(sum, next, off); | ||
704 | off += v.iov_len; | ||
705 | }) | ||
706 | ) | ||
707 | *csum = sum; | ||
708 | return bytes; | ||
709 | } | ||
710 | EXPORT_SYMBOL(csum_and_copy_to_iter); | ||
711 | |||
951 | int iov_iter_npages(const struct iov_iter *i, int maxpages) | 712 | int iov_iter_npages(const struct iov_iter *i, int maxpages) |
952 | { | 713 | { |
953 | if (i->type & ITER_BVEC) | 714 | size_t size = i->count; |
954 | return iov_iter_npages_bvec(i, maxpages); | 715 | int npages = 0; |
955 | else | 716 | |
956 | return iov_iter_npages_iovec(i, maxpages); | 717 | if (!size) |
718 | return 0; | ||
719 | |||
720 | iterate_all_kinds(i, size, v, ({ | ||
721 | unsigned long p = (unsigned long)v.iov_base; | ||
722 | npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) | ||
723 | - p / PAGE_SIZE; | ||
724 | if (npages >= maxpages) | ||
725 | return maxpages; | ||
726 | 0;}),({ | ||
727 | npages++; | ||
728 | if (npages >= maxpages) | ||
729 | return maxpages; | ||
730 | }),({ | ||
731 | unsigned long p = (unsigned long)v.iov_base; | ||
732 | npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) | ||
733 | - p / PAGE_SIZE; | ||
734 | if (npages >= maxpages) | ||
735 | return maxpages; | ||
736 | }) | ||
737 | ) | ||
738 | return npages; | ||
957 | } | 739 | } |
958 | EXPORT_SYMBOL(iov_iter_npages); | 740 | EXPORT_SYMBOL(iov_iter_npages); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8c3385181b16..ee48428cf8e3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1536,12 +1536,8 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg) | |||
1536 | * start move here. | 1536 | * start move here. |
1537 | */ | 1537 | */ |
1538 | 1538 | ||
1539 | /* for quick checking without looking up memcg */ | ||
1540 | atomic_t memcg_moving __read_mostly; | ||
1541 | |||
1542 | static void mem_cgroup_start_move(struct mem_cgroup *memcg) | 1539 | static void mem_cgroup_start_move(struct mem_cgroup *memcg) |
1543 | { | 1540 | { |
1544 | atomic_inc(&memcg_moving); | ||
1545 | atomic_inc(&memcg->moving_account); | 1541 | atomic_inc(&memcg->moving_account); |
1546 | synchronize_rcu(); | 1542 | synchronize_rcu(); |
1547 | } | 1543 | } |
@@ -1552,10 +1548,8 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg) | |||
1552 | * Now, mem_cgroup_clear_mc() may call this function with NULL. | 1548 | * Now, mem_cgroup_clear_mc() may call this function with NULL. |
1553 | * We check NULL in callee rather than caller. | 1549 | * We check NULL in callee rather than caller. |
1554 | */ | 1550 | */ |
1555 | if (memcg) { | 1551 | if (memcg) |
1556 | atomic_dec(&memcg_moving); | ||
1557 | atomic_dec(&memcg->moving_account); | 1552 | atomic_dec(&memcg->moving_account); |
1558 | } | ||
1559 | } | 1553 | } |
1560 | 1554 | ||
1561 | /* | 1555 | /* |
@@ -2204,41 +2198,52 @@ cleanup: | |||
2204 | return true; | 2198 | return true; |
2205 | } | 2199 | } |
2206 | 2200 | ||
2207 | /* | 2201 | /** |
2208 | * Used to update mapped file or writeback or other statistics. | 2202 | * mem_cgroup_begin_page_stat - begin a page state statistics transaction |
2203 | * @page: page that is going to change accounted state | ||
2204 | * @locked: &memcg->move_lock slowpath was taken | ||
2205 | * @flags: IRQ-state flags for &memcg->move_lock | ||
2209 | * | 2206 | * |
2210 | * Notes: Race condition | 2207 | * This function must mark the beginning of an accounted page state |
2208 | * change to prevent double accounting when the page is concurrently | ||
2209 | * being moved to another memcg: | ||
2211 | * | 2210 | * |
2212 | * Charging occurs during page instantiation, while the page is | 2211 | * memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); |
2213 | * unmapped and locked in page migration, or while the page table is | 2212 | * if (TestClearPageState(page)) |
2214 | * locked in THP migration. No race is possible. | 2213 | * mem_cgroup_update_page_stat(memcg, state, -1); |
2214 | * mem_cgroup_end_page_stat(memcg, locked, flags); | ||
2215 | * | 2215 | * |
2216 | * Uncharge happens to pages with zero references, no race possible. | 2216 | * The RCU lock is held throughout the transaction. The fast path can |
2217 | * get away without acquiring the memcg->move_lock (@locked is false) | ||
2218 | * because page moving starts with an RCU grace period. | ||
2217 | * | 2219 | * |
2218 | * Charge moving between groups is protected by checking mm->moving | 2220 | * The RCU lock also protects the memcg from being freed when the page |
2219 | * account and taking the move_lock in the slowpath. | 2221 | * state that is going to change is the only thing preventing the page |
2222 | * from being uncharged. E.g. end-writeback clearing PageWriteback(), | ||
2223 | * which allows migration to go ahead and uncharge the page before the | ||
2224 | * account transaction might be complete. | ||
2220 | */ | 2225 | */ |
2221 | 2226 | struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, | |
2222 | void __mem_cgroup_begin_update_page_stat(struct page *page, | 2227 | bool *locked, |
2223 | bool *locked, unsigned long *flags) | 2228 | unsigned long *flags) |
2224 | { | 2229 | { |
2225 | struct mem_cgroup *memcg; | 2230 | struct mem_cgroup *memcg; |
2226 | struct page_cgroup *pc; | 2231 | struct page_cgroup *pc; |
2227 | 2232 | ||
2233 | rcu_read_lock(); | ||
2234 | |||
2235 | if (mem_cgroup_disabled()) | ||
2236 | return NULL; | ||
2237 | |||
2228 | pc = lookup_page_cgroup(page); | 2238 | pc = lookup_page_cgroup(page); |
2229 | again: | 2239 | again: |
2230 | memcg = pc->mem_cgroup; | 2240 | memcg = pc->mem_cgroup; |
2231 | if (unlikely(!memcg || !PageCgroupUsed(pc))) | 2241 | if (unlikely(!memcg || !PageCgroupUsed(pc))) |
2232 | return; | 2242 | return NULL; |
2233 | /* | 2243 | |
2234 | * If this memory cgroup is not under account moving, we don't | 2244 | *locked = false; |
2235 | * need to take move_lock_mem_cgroup(). Because we already hold | ||
2236 | * rcu_read_lock(), any calls to move_account will be delayed until | ||
2237 | * rcu_read_unlock(). | ||
2238 | */ | ||
2239 | VM_BUG_ON(!rcu_read_lock_held()); | ||
2240 | if (atomic_read(&memcg->moving_account) <= 0) | 2245 | if (atomic_read(&memcg->moving_account) <= 0) |
2241 | return; | 2246 | return memcg; |
2242 | 2247 | ||
2243 | move_lock_mem_cgroup(memcg, flags); | 2248 | move_lock_mem_cgroup(memcg, flags); |
2244 | if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) { | 2249 | if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) { |
@@ -2246,36 +2251,40 @@ again: | |||
2246 | goto again; | 2251 | goto again; |
2247 | } | 2252 | } |
2248 | *locked = true; | 2253 | *locked = true; |
2254 | |||
2255 | return memcg; | ||
2249 | } | 2256 | } |
2250 | 2257 | ||
2251 | void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags) | 2258 | /** |
2259 | * mem_cgroup_end_page_stat - finish a page state statistics transaction | ||
2260 | * @memcg: the memcg that was accounted against | ||
2261 | * @locked: value received from mem_cgroup_begin_page_stat() | ||
2262 | * @flags: value received from mem_cgroup_begin_page_stat() | ||
2263 | */ | ||
2264 | void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked, | ||
2265 | unsigned long flags) | ||
2252 | { | 2266 | { |
2253 | struct page_cgroup *pc = lookup_page_cgroup(page); | 2267 | if (memcg && locked) |
2268 | move_unlock_mem_cgroup(memcg, &flags); | ||
2254 | 2269 | ||
2255 | /* | 2270 | rcu_read_unlock(); |
2256 | * It's guaranteed that pc->mem_cgroup never changes while | ||
2257 | * lock is held because a routine modifies pc->mem_cgroup | ||
2258 | * should take move_lock_mem_cgroup(). | ||
2259 | */ | ||
2260 | move_unlock_mem_cgroup(pc->mem_cgroup, flags); | ||
2261 | } | 2271 | } |
2262 | 2272 | ||
2263 | void mem_cgroup_update_page_stat(struct page *page, | 2273 | /** |
2274 | * mem_cgroup_update_page_stat - update page state statistics | ||
2275 | * @memcg: memcg to account against | ||
2276 | * @idx: page state item to account | ||
2277 | * @val: number of pages (positive or negative) | ||
2278 | * | ||
2279 | * See mem_cgroup_begin_page_stat() for locking requirements. | ||
2280 | */ | ||
2281 | void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, | ||
2264 | enum mem_cgroup_stat_index idx, int val) | 2282 | enum mem_cgroup_stat_index idx, int val) |
2265 | { | 2283 | { |
2266 | struct mem_cgroup *memcg; | ||
2267 | struct page_cgroup *pc = lookup_page_cgroup(page); | ||
2268 | unsigned long uninitialized_var(flags); | ||
2269 | |||
2270 | if (mem_cgroup_disabled()) | ||
2271 | return; | ||
2272 | |||
2273 | VM_BUG_ON(!rcu_read_lock_held()); | 2284 | VM_BUG_ON(!rcu_read_lock_held()); |
2274 | memcg = pc->mem_cgroup; | ||
2275 | if (unlikely(!memcg || !PageCgroupUsed(pc))) | ||
2276 | return; | ||
2277 | 2285 | ||
2278 | this_cpu_add(memcg->stat->count[idx], val); | 2286 | if (memcg) |
2287 | this_cpu_add(memcg->stat->count[idx], val); | ||
2279 | } | 2288 | } |
2280 | 2289 | ||
2281 | /* | 2290 | /* |
diff --git a/mm/memory.c b/mm/memory.c index 1cc6bfbd872e..3e503831e042 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1147,6 +1147,7 @@ again: | |||
1147 | print_bad_pte(vma, addr, ptent, page); | 1147 | print_bad_pte(vma, addr, ptent, page); |
1148 | if (unlikely(!__tlb_remove_page(tlb, page))) { | 1148 | if (unlikely(!__tlb_remove_page(tlb, page))) { |
1149 | force_flush = 1; | 1149 | force_flush = 1; |
1150 | addr += PAGE_SIZE; | ||
1150 | break; | 1151 | break; |
1151 | } | 1152 | } |
1152 | continue; | 1153 | continue; |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 29d8693d0c61..1bf4807cb21e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/stop_machine.h> | 31 | #include <linux/stop_machine.h> |
32 | #include <linux/hugetlb.h> | 32 | #include <linux/hugetlb.h> |
33 | #include <linux/memblock.h> | 33 | #include <linux/memblock.h> |
34 | #include <linux/bootmem.h> | ||
34 | 35 | ||
35 | #include <asm/tlbflush.h> | 36 | #include <asm/tlbflush.h> |
36 | 37 | ||
@@ -1066,6 +1067,16 @@ out: | |||
1066 | } | 1067 | } |
1067 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ | 1068 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ |
1068 | 1069 | ||
1070 | static void reset_node_present_pages(pg_data_t *pgdat) | ||
1071 | { | ||
1072 | struct zone *z; | ||
1073 | |||
1074 | for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) | ||
1075 | z->present_pages = 0; | ||
1076 | |||
1077 | pgdat->node_present_pages = 0; | ||
1078 | } | ||
1079 | |||
1069 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ | 1080 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ |
1070 | static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | 1081 | static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) |
1071 | { | 1082 | { |
@@ -1096,6 +1107,21 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | |||
1096 | build_all_zonelists(pgdat, NULL); | 1107 | build_all_zonelists(pgdat, NULL); |
1097 | mutex_unlock(&zonelists_mutex); | 1108 | mutex_unlock(&zonelists_mutex); |
1098 | 1109 | ||
1110 | /* | ||
1111 | * zone->managed_pages is set to an approximate value in | ||
1112 | * free_area_init_core(), which will cause | ||
1113 | * /sys/device/system/node/nodeX/meminfo has wrong data. | ||
1114 | * So reset it to 0 before any memory is onlined. | ||
1115 | */ | ||
1116 | reset_node_managed_pages(pgdat); | ||
1117 | |||
1118 | /* | ||
1119 | * When memory is hot-added, all the memory is in offline state. So | ||
1120 | * clear all zones' present_pages because they will be updated in | ||
1121 | * online_pages() and offline_pages(). | ||
1122 | */ | ||
1123 | reset_node_present_pages(pgdat); | ||
1124 | |||
1099 | return pgdat; | 1125 | return pgdat; |
1100 | } | 1126 | } |
1101 | 1127 | ||
@@ -1912,7 +1938,6 @@ void try_offline_node(int nid) | |||
1912 | unsigned long start_pfn = pgdat->node_start_pfn; | 1938 | unsigned long start_pfn = pgdat->node_start_pfn; |
1913 | unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; | 1939 | unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; |
1914 | unsigned long pfn; | 1940 | unsigned long pfn; |
1915 | struct page *pgdat_page = virt_to_page(pgdat); | ||
1916 | int i; | 1941 | int i; |
1917 | 1942 | ||
1918 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | 1943 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { |
@@ -1941,10 +1966,6 @@ void try_offline_node(int nid) | |||
1941 | node_set_offline(nid); | 1966 | node_set_offline(nid); |
1942 | unregister_one_node(nid); | 1967 | unregister_one_node(nid); |
1943 | 1968 | ||
1944 | if (!PageSlab(pgdat_page) && !PageCompound(pgdat_page)) | ||
1945 | /* node data is allocated from boot memory */ | ||
1946 | return; | ||
1947 | |||
1948 | /* free waittable in each zone */ | 1969 | /* free waittable in each zone */ |
1949 | for (i = 0; i < MAX_NR_ZONES; i++) { | 1970 | for (i = 0; i < MAX_NR_ZONES; i++) { |
1950 | struct zone *zone = pgdat->node_zones + i; | 1971 | struct zone *zone = pgdat->node_zones + i; |
@@ -1080,7 +1080,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
1080 | end, prev->vm_pgoff, NULL); | 1080 | end, prev->vm_pgoff, NULL); |
1081 | if (err) | 1081 | if (err) |
1082 | return NULL; | 1082 | return NULL; |
1083 | khugepaged_enter_vma_merge(prev); | 1083 | khugepaged_enter_vma_merge(prev, vm_flags); |
1084 | return prev; | 1084 | return prev; |
1085 | } | 1085 | } |
1086 | 1086 | ||
@@ -1099,7 +1099,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
1099 | next->vm_pgoff - pglen, NULL); | 1099 | next->vm_pgoff - pglen, NULL); |
1100 | if (err) | 1100 | if (err) |
1101 | return NULL; | 1101 | return NULL; |
1102 | khugepaged_enter_vma_merge(area); | 1102 | khugepaged_enter_vma_merge(area, vm_flags); |
1103 | return area; | 1103 | return area; |
1104 | } | 1104 | } |
1105 | 1105 | ||
@@ -2208,7 +2208,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) | |||
2208 | } | 2208 | } |
2209 | } | 2209 | } |
2210 | vma_unlock_anon_vma(vma); | 2210 | vma_unlock_anon_vma(vma); |
2211 | khugepaged_enter_vma_merge(vma); | 2211 | khugepaged_enter_vma_merge(vma, vma->vm_flags); |
2212 | validate_mm(vma->vm_mm); | 2212 | validate_mm(vma->vm_mm); |
2213 | return error; | 2213 | return error; |
2214 | } | 2214 | } |
@@ -2277,7 +2277,7 @@ int expand_downwards(struct vm_area_struct *vma, | |||
2277 | } | 2277 | } |
2278 | } | 2278 | } |
2279 | vma_unlock_anon_vma(vma); | 2279 | vma_unlock_anon_vma(vma); |
2280 | khugepaged_enter_vma_merge(vma); | 2280 | khugepaged_enter_vma_merge(vma, vma->vm_flags); |
2281 | validate_mm(vma->vm_mm); | 2281 | validate_mm(vma->vm_mm); |
2282 | return error; | 2282 | return error; |
2283 | } | 2283 | } |
diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 7c7ab32ee503..90b50468333e 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c | |||
@@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void) | |||
145 | 145 | ||
146 | static int reset_managed_pages_done __initdata; | 146 | static int reset_managed_pages_done __initdata; |
147 | 147 | ||
148 | static inline void __init reset_node_managed_pages(pg_data_t *pgdat) | 148 | void reset_node_managed_pages(pg_data_t *pgdat) |
149 | { | 149 | { |
150 | struct zone *z; | 150 | struct zone *z; |
151 | 151 | ||
152 | if (reset_managed_pages_done) | ||
153 | return; | ||
154 | for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) | 152 | for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) |
155 | z->managed_pages = 0; | 153 | z->managed_pages = 0; |
156 | } | 154 | } |
@@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void) | |||
159 | { | 157 | { |
160 | struct pglist_data *pgdat; | 158 | struct pglist_data *pgdat; |
161 | 159 | ||
160 | if (reset_managed_pages_done) | ||
161 | return; | ||
162 | |||
162 | for_each_online_pgdat(pgdat) | 163 | for_each_online_pgdat(pgdat) |
163 | reset_node_managed_pages(pgdat); | 164 | reset_node_managed_pages(pgdat); |
165 | |||
164 | reset_managed_pages_done = 1; | 166 | reset_managed_pages_done = 1; |
165 | } | 167 | } |
166 | 168 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ff24c9d83112..19ceae87522d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -2116,23 +2116,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) | |||
2116 | EXPORT_SYMBOL(account_page_dirtied); | 2116 | EXPORT_SYMBOL(account_page_dirtied); |
2117 | 2117 | ||
2118 | /* | 2118 | /* |
2119 | * Helper function for set_page_writeback family. | ||
2120 | * | ||
2121 | * The caller must hold mem_cgroup_begin/end_update_page_stat() lock | ||
2122 | * while calling this function. | ||
2123 | * See test_set_page_writeback for example. | ||
2124 | * | ||
2125 | * NOTE: Unlike account_page_dirtied this does not rely on being atomic | ||
2126 | * wrt interrupts. | ||
2127 | */ | ||
2128 | void account_page_writeback(struct page *page) | ||
2129 | { | ||
2130 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); | ||
2131 | inc_zone_page_state(page, NR_WRITEBACK); | ||
2132 | } | ||
2133 | EXPORT_SYMBOL(account_page_writeback); | ||
2134 | |||
2135 | /* | ||
2136 | * For address_spaces which do not use buffers. Just tag the page as dirty in | 2119 | * For address_spaces which do not use buffers. Just tag the page as dirty in |
2137 | * its radix tree. | 2120 | * its radix tree. |
2138 | * | 2121 | * |
@@ -2344,11 +2327,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); | |||
2344 | int test_clear_page_writeback(struct page *page) | 2327 | int test_clear_page_writeback(struct page *page) |
2345 | { | 2328 | { |
2346 | struct address_space *mapping = page_mapping(page); | 2329 | struct address_space *mapping = page_mapping(page); |
2347 | int ret; | ||
2348 | bool locked; | ||
2349 | unsigned long memcg_flags; | 2330 | unsigned long memcg_flags; |
2331 | struct mem_cgroup *memcg; | ||
2332 | bool locked; | ||
2333 | int ret; | ||
2350 | 2334 | ||
2351 | mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); | 2335 | memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags); |
2352 | if (mapping) { | 2336 | if (mapping) { |
2353 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 2337 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
2354 | unsigned long flags; | 2338 | unsigned long flags; |
@@ -2369,22 +2353,23 @@ int test_clear_page_writeback(struct page *page) | |||
2369 | ret = TestClearPageWriteback(page); | 2353 | ret = TestClearPageWriteback(page); |
2370 | } | 2354 | } |
2371 | if (ret) { | 2355 | if (ret) { |
2372 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); | 2356 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); |
2373 | dec_zone_page_state(page, NR_WRITEBACK); | 2357 | dec_zone_page_state(page, NR_WRITEBACK); |
2374 | inc_zone_page_state(page, NR_WRITTEN); | 2358 | inc_zone_page_state(page, NR_WRITTEN); |
2375 | } | 2359 | } |
2376 | mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); | 2360 | mem_cgroup_end_page_stat(memcg, locked, memcg_flags); |
2377 | return ret; | 2361 | return ret; |
2378 | } | 2362 | } |
2379 | 2363 | ||
2380 | int __test_set_page_writeback(struct page *page, bool keep_write) | 2364 | int __test_set_page_writeback(struct page *page, bool keep_write) |
2381 | { | 2365 | { |
2382 | struct address_space *mapping = page_mapping(page); | 2366 | struct address_space *mapping = page_mapping(page); |
2383 | int ret; | ||
2384 | bool locked; | ||
2385 | unsigned long memcg_flags; | 2367 | unsigned long memcg_flags; |
2368 | struct mem_cgroup *memcg; | ||
2369 | bool locked; | ||
2370 | int ret; | ||
2386 | 2371 | ||
2387 | mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); | 2372 | memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags); |
2388 | if (mapping) { | 2373 | if (mapping) { |
2389 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 2374 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
2390 | unsigned long flags; | 2375 | unsigned long flags; |
@@ -2410,9 +2395,11 @@ int __test_set_page_writeback(struct page *page, bool keep_write) | |||
2410 | } else { | 2395 | } else { |
2411 | ret = TestSetPageWriteback(page); | 2396 | ret = TestSetPageWriteback(page); |
2412 | } | 2397 | } |
2413 | if (!ret) | 2398 | if (!ret) { |
2414 | account_page_writeback(page); | 2399 | mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); |
2415 | mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); | 2400 | inc_zone_page_state(page, NR_WRITEBACK); |
2401 | } | ||
2402 | mem_cgroup_end_page_stat(memcg, locked, memcg_flags); | ||
2416 | return ret; | 2403 | return ret; |
2417 | 2404 | ||
2418 | } | 2405 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9cd36b822444..616a2c956b4b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -467,29 +467,6 @@ static inline void rmv_page_order(struct page *page) | |||
467 | } | 467 | } |
468 | 468 | ||
469 | /* | 469 | /* |
470 | * Locate the struct page for both the matching buddy in our | ||
471 | * pair (buddy1) and the combined O(n+1) page they form (page). | ||
472 | * | ||
473 | * 1) Any buddy B1 will have an order O twin B2 which satisfies | ||
474 | * the following equation: | ||
475 | * B2 = B1 ^ (1 << O) | ||
476 | * For example, if the starting buddy (buddy2) is #8 its order | ||
477 | * 1 buddy is #10: | ||
478 | * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 | ||
479 | * | ||
480 | * 2) Any buddy B will have an order O+1 parent P which | ||
481 | * satisfies the following equation: | ||
482 | * P = B & ~(1 << O) | ||
483 | * | ||
484 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER | ||
485 | */ | ||
486 | static inline unsigned long | ||
487 | __find_buddy_index(unsigned long page_idx, unsigned int order) | ||
488 | { | ||
489 | return page_idx ^ (1 << order); | ||
490 | } | ||
491 | |||
492 | /* | ||
493 | * This function checks whether a page is free && is the buddy | 470 | * This function checks whether a page is free && is the buddy |
494 | * we can do coalesce a page and its buddy if | 471 | * we can do coalesce a page and its buddy if |
495 | * (a) the buddy is not in a hole && | 472 | * (a) the buddy is not in a hole && |
@@ -569,6 +546,7 @@ static inline void __free_one_page(struct page *page, | |||
569 | unsigned long combined_idx; | 546 | unsigned long combined_idx; |
570 | unsigned long uninitialized_var(buddy_idx); | 547 | unsigned long uninitialized_var(buddy_idx); |
571 | struct page *buddy; | 548 | struct page *buddy; |
549 | int max_order = MAX_ORDER; | ||
572 | 550 | ||
573 | VM_BUG_ON(!zone_is_initialized(zone)); | 551 | VM_BUG_ON(!zone_is_initialized(zone)); |
574 | 552 | ||
@@ -577,13 +555,24 @@ static inline void __free_one_page(struct page *page, | |||
577 | return; | 555 | return; |
578 | 556 | ||
579 | VM_BUG_ON(migratetype == -1); | 557 | VM_BUG_ON(migratetype == -1); |
558 | if (is_migrate_isolate(migratetype)) { | ||
559 | /* | ||
560 | * We restrict max order of merging to prevent merge | ||
561 | * between freepages on isolate pageblock and normal | ||
562 | * pageblock. Without this, pageblock isolation | ||
563 | * could cause incorrect freepage accounting. | ||
564 | */ | ||
565 | max_order = min(MAX_ORDER, pageblock_order + 1); | ||
566 | } else { | ||
567 | __mod_zone_freepage_state(zone, 1 << order, migratetype); | ||
568 | } | ||
580 | 569 | ||
581 | page_idx = pfn & ((1 << MAX_ORDER) - 1); | 570 | page_idx = pfn & ((1 << max_order) - 1); |
582 | 571 | ||
583 | VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); | 572 | VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); |
584 | VM_BUG_ON_PAGE(bad_range(zone, page), page); | 573 | VM_BUG_ON_PAGE(bad_range(zone, page), page); |
585 | 574 | ||
586 | while (order < MAX_ORDER-1) { | 575 | while (order < max_order - 1) { |
587 | buddy_idx = __find_buddy_index(page_idx, order); | 576 | buddy_idx = __find_buddy_index(page_idx, order); |
588 | buddy = page + (buddy_idx - page_idx); | 577 | buddy = page + (buddy_idx - page_idx); |
589 | if (!page_is_buddy(page, buddy, order)) | 578 | if (!page_is_buddy(page, buddy, order)) |
@@ -594,9 +583,11 @@ static inline void __free_one_page(struct page *page, | |||
594 | */ | 583 | */ |
595 | if (page_is_guard(buddy)) { | 584 | if (page_is_guard(buddy)) { |
596 | clear_page_guard_flag(buddy); | 585 | clear_page_guard_flag(buddy); |
597 | set_page_private(page, 0); | 586 | set_page_private(buddy, 0); |
598 | __mod_zone_freepage_state(zone, 1 << order, | 587 | if (!is_migrate_isolate(migratetype)) { |
599 | migratetype); | 588 | __mod_zone_freepage_state(zone, 1 << order, |
589 | migratetype); | ||
590 | } | ||
600 | } else { | 591 | } else { |
601 | list_del(&buddy->lru); | 592 | list_del(&buddy->lru); |
602 | zone->free_area[order].nr_free--; | 593 | zone->free_area[order].nr_free--; |
@@ -715,14 +706,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
715 | /* must delete as __free_one_page list manipulates */ | 706 | /* must delete as __free_one_page list manipulates */ |
716 | list_del(&page->lru); | 707 | list_del(&page->lru); |
717 | mt = get_freepage_migratetype(page); | 708 | mt = get_freepage_migratetype(page); |
709 | if (unlikely(has_isolate_pageblock(zone))) | ||
710 | mt = get_pageblock_migratetype(page); | ||
711 | |||
718 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ | 712 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ |
719 | __free_one_page(page, page_to_pfn(page), zone, 0, mt); | 713 | __free_one_page(page, page_to_pfn(page), zone, 0, mt); |
720 | trace_mm_page_pcpu_drain(page, 0, mt); | 714 | trace_mm_page_pcpu_drain(page, 0, mt); |
721 | if (likely(!is_migrate_isolate_page(page))) { | ||
722 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1); | ||
723 | if (is_migrate_cma(mt)) | ||
724 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); | ||
725 | } | ||
726 | } while (--to_free && --batch_free && !list_empty(list)); | 715 | } while (--to_free && --batch_free && !list_empty(list)); |
727 | } | 716 | } |
728 | spin_unlock(&zone->lock); | 717 | spin_unlock(&zone->lock); |
@@ -739,9 +728,11 @@ static void free_one_page(struct zone *zone, | |||
739 | if (nr_scanned) | 728 | if (nr_scanned) |
740 | __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); | 729 | __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); |
741 | 730 | ||
731 | if (unlikely(has_isolate_pageblock(zone) || | ||
732 | is_migrate_isolate(migratetype))) { | ||
733 | migratetype = get_pfnblock_migratetype(page, pfn); | ||
734 | } | ||
742 | __free_one_page(page, pfn, zone, order, migratetype); | 735 | __free_one_page(page, pfn, zone, order, migratetype); |
743 | if (unlikely(!is_migrate_isolate(migratetype))) | ||
744 | __mod_zone_freepage_state(zone, 1 << order, migratetype); | ||
745 | spin_unlock(&zone->lock); | 736 | spin_unlock(&zone->lock); |
746 | } | 737 | } |
747 | 738 | ||
@@ -1484,7 +1475,7 @@ void split_page(struct page *page, unsigned int order) | |||
1484 | } | 1475 | } |
1485 | EXPORT_SYMBOL_GPL(split_page); | 1476 | EXPORT_SYMBOL_GPL(split_page); |
1486 | 1477 | ||
1487 | static int __isolate_free_page(struct page *page, unsigned int order) | 1478 | int __isolate_free_page(struct page *page, unsigned int order) |
1488 | { | 1479 | { |
1489 | unsigned long watermark; | 1480 | unsigned long watermark; |
1490 | struct zone *zone; | 1481 | struct zone *zone; |
@@ -6408,13 +6399,12 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
6408 | 6399 | ||
6409 | /* Make sure the range is really isolated. */ | 6400 | /* Make sure the range is really isolated. */ |
6410 | if (test_pages_isolated(outer_start, end, false)) { | 6401 | if (test_pages_isolated(outer_start, end, false)) { |
6411 | pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", | 6402 | pr_info("%s: [%lx, %lx) PFNs busy\n", |
6412 | outer_start, end); | 6403 | __func__, outer_start, end); |
6413 | ret = -EBUSY; | 6404 | ret = -EBUSY; |
6414 | goto done; | 6405 | goto done; |
6415 | } | 6406 | } |
6416 | 6407 | ||
6417 | |||
6418 | /* Grab isolated pages from freelists. */ | 6408 | /* Grab isolated pages from freelists. */ |
6419 | outer_end = isolate_freepages_range(&cc, outer_start, end); | 6409 | outer_end = isolate_freepages_range(&cc, outer_start, end); |
6420 | if (!outer_end) { | 6410 | if (!outer_end) { |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 3708264d2833..5331c2bd85a2 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -171,6 +171,7 @@ static void free_page_cgroup(void *addr) | |||
171 | sizeof(struct page_cgroup) * PAGES_PER_SECTION; | 171 | sizeof(struct page_cgroup) * PAGES_PER_SECTION; |
172 | 172 | ||
173 | BUG_ON(PageReserved(page)); | 173 | BUG_ON(PageReserved(page)); |
174 | kmemleak_free(addr); | ||
174 | free_pages_exact(addr, table_size); | 175 | free_pages_exact(addr, table_size); |
175 | } | 176 | } |
176 | } | 177 | } |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index d1473b2e9481..c8778f7e208e 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -60,6 +60,7 @@ out: | |||
60 | int migratetype = get_pageblock_migratetype(page); | 60 | int migratetype = get_pageblock_migratetype(page); |
61 | 61 | ||
62 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | 62 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); |
63 | zone->nr_isolate_pageblock++; | ||
63 | nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); | 64 | nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); |
64 | 65 | ||
65 | __mod_zone_freepage_state(zone, -nr_pages, migratetype); | 66 | __mod_zone_freepage_state(zone, -nr_pages, migratetype); |
@@ -75,16 +76,54 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) | |||
75 | { | 76 | { |
76 | struct zone *zone; | 77 | struct zone *zone; |
77 | unsigned long flags, nr_pages; | 78 | unsigned long flags, nr_pages; |
79 | struct page *isolated_page = NULL; | ||
80 | unsigned int order; | ||
81 | unsigned long page_idx, buddy_idx; | ||
82 | struct page *buddy; | ||
78 | 83 | ||
79 | zone = page_zone(page); | 84 | zone = page_zone(page); |
80 | spin_lock_irqsave(&zone->lock, flags); | 85 | spin_lock_irqsave(&zone->lock, flags); |
81 | if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) | 86 | if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
82 | goto out; | 87 | goto out; |
83 | nr_pages = move_freepages_block(zone, page, migratetype); | 88 | |
84 | __mod_zone_freepage_state(zone, nr_pages, migratetype); | 89 | /* |
90 | * Because freepage with more than pageblock_order on isolated | ||
91 | * pageblock is restricted to merge due to freepage counting problem, | ||
92 | * it is possible that there is free buddy page. | ||
93 | * move_freepages_block() doesn't care of merge so we need other | ||
94 | * approach in order to merge them. Isolation and free will make | ||
95 | * these pages to be merged. | ||
96 | */ | ||
97 | if (PageBuddy(page)) { | ||
98 | order = page_order(page); | ||
99 | if (order >= pageblock_order) { | ||
100 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); | ||
101 | buddy_idx = __find_buddy_index(page_idx, order); | ||
102 | buddy = page + (buddy_idx - page_idx); | ||
103 | |||
104 | if (!is_migrate_isolate_page(buddy)) { | ||
105 | __isolate_free_page(page, order); | ||
106 | set_page_refcounted(page); | ||
107 | isolated_page = page; | ||
108 | } | ||
109 | } | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * If we isolate freepage with more than pageblock_order, there | ||
114 | * should be no freepage in the range, so we could avoid costly | ||
115 | * pageblock scanning for freepage moving. | ||
116 | */ | ||
117 | if (!isolated_page) { | ||
118 | nr_pages = move_freepages_block(zone, page, migratetype); | ||
119 | __mod_zone_freepage_state(zone, nr_pages, migratetype); | ||
120 | } | ||
85 | set_pageblock_migratetype(page, migratetype); | 121 | set_pageblock_migratetype(page, migratetype); |
122 | zone->nr_isolate_pageblock--; | ||
86 | out: | 123 | out: |
87 | spin_unlock_irqrestore(&zone->lock, flags); | 124 | spin_unlock_irqrestore(&zone->lock, flags); |
125 | if (isolated_page) | ||
126 | __free_pages(isolated_page, order); | ||
88 | } | 127 | } |
89 | 128 | ||
90 | static inline struct page * | 129 | static inline struct page * |
@@ -1042,15 +1042,46 @@ void page_add_new_anon_rmap(struct page *page, | |||
1042 | */ | 1042 | */ |
1043 | void page_add_file_rmap(struct page *page) | 1043 | void page_add_file_rmap(struct page *page) |
1044 | { | 1044 | { |
1045 | bool locked; | 1045 | struct mem_cgroup *memcg; |
1046 | unsigned long flags; | 1046 | unsigned long flags; |
1047 | bool locked; | ||
1047 | 1048 | ||
1048 | mem_cgroup_begin_update_page_stat(page, &locked, &flags); | 1049 | memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); |
1049 | if (atomic_inc_and_test(&page->_mapcount)) { | 1050 | if (atomic_inc_and_test(&page->_mapcount)) { |
1050 | __inc_zone_page_state(page, NR_FILE_MAPPED); | 1051 | __inc_zone_page_state(page, NR_FILE_MAPPED); |
1051 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); | 1052 | mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); |
1052 | } | 1053 | } |
1053 | mem_cgroup_end_update_page_stat(page, &locked, &flags); | 1054 | mem_cgroup_end_page_stat(memcg, locked, flags); |
1055 | } | ||
1056 | |||
1057 | static void page_remove_file_rmap(struct page *page) | ||
1058 | { | ||
1059 | struct mem_cgroup *memcg; | ||
1060 | unsigned long flags; | ||
1061 | bool locked; | ||
1062 | |||
1063 | memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); | ||
1064 | |||
1065 | /* page still mapped by someone else? */ | ||
1066 | if (!atomic_add_negative(-1, &page->_mapcount)) | ||
1067 | goto out; | ||
1068 | |||
1069 | /* Hugepages are not counted in NR_FILE_MAPPED for now. */ | ||
1070 | if (unlikely(PageHuge(page))) | ||
1071 | goto out; | ||
1072 | |||
1073 | /* | ||
1074 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because | ||
1075 | * these counters are not modified in interrupt context, and | ||
1076 | * pte lock(a spinlock) is held, which implies preemption disabled. | ||
1077 | */ | ||
1078 | __dec_zone_page_state(page, NR_FILE_MAPPED); | ||
1079 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); | ||
1080 | |||
1081 | if (unlikely(PageMlocked(page))) | ||
1082 | clear_page_mlock(page); | ||
1083 | out: | ||
1084 | mem_cgroup_end_page_stat(memcg, locked, flags); | ||
1054 | } | 1085 | } |
1055 | 1086 | ||
1056 | /** | 1087 | /** |
@@ -1061,46 +1092,33 @@ void page_add_file_rmap(struct page *page) | |||
1061 | */ | 1092 | */ |
1062 | void page_remove_rmap(struct page *page) | 1093 | void page_remove_rmap(struct page *page) |
1063 | { | 1094 | { |
1064 | bool anon = PageAnon(page); | 1095 | if (!PageAnon(page)) { |
1065 | bool locked; | 1096 | page_remove_file_rmap(page); |
1066 | unsigned long flags; | 1097 | return; |
1067 | 1098 | } | |
1068 | /* | ||
1069 | * The anon case has no mem_cgroup page_stat to update; but may | ||
1070 | * uncharge_page() below, where the lock ordering can deadlock if | ||
1071 | * we hold the lock against page_stat move: so avoid it on anon. | ||
1072 | */ | ||
1073 | if (!anon) | ||
1074 | mem_cgroup_begin_update_page_stat(page, &locked, &flags); | ||
1075 | 1099 | ||
1076 | /* page still mapped by someone else? */ | 1100 | /* page still mapped by someone else? */ |
1077 | if (!atomic_add_negative(-1, &page->_mapcount)) | 1101 | if (!atomic_add_negative(-1, &page->_mapcount)) |
1078 | goto out; | 1102 | return; |
1103 | |||
1104 | /* Hugepages are not counted in NR_ANON_PAGES for now. */ | ||
1105 | if (unlikely(PageHuge(page))) | ||
1106 | return; | ||
1079 | 1107 | ||
1080 | /* | 1108 | /* |
1081 | * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED | ||
1082 | * and not charged by memcg for now. | ||
1083 | * | ||
1084 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because | 1109 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because |
1085 | * these counters are not modified in interrupt context, and | 1110 | * these counters are not modified in interrupt context, and |
1086 | * these counters are not modified in interrupt context, and | ||
1087 | * pte lock(a spinlock) is held, which implies preemption disabled. | 1111 | * pte lock(a spinlock) is held, which implies preemption disabled. |
1088 | */ | 1112 | */ |
1089 | if (unlikely(PageHuge(page))) | 1113 | if (PageTransHuge(page)) |
1090 | goto out; | 1114 | __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); |
1091 | if (anon) { | 1115 | |
1092 | if (PageTransHuge(page)) | 1116 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, |
1093 | __dec_zone_page_state(page, | 1117 | -hpage_nr_pages(page)); |
1094 | NR_ANON_TRANSPARENT_HUGEPAGES); | 1118 | |
1095 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, | ||
1096 | -hpage_nr_pages(page)); | ||
1097 | } else { | ||
1098 | __dec_zone_page_state(page, NR_FILE_MAPPED); | ||
1099 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); | ||
1100 | mem_cgroup_end_update_page_stat(page, &locked, &flags); | ||
1101 | } | ||
1102 | if (unlikely(PageMlocked(page))) | 1119 | if (unlikely(PageMlocked(page))) |
1103 | clear_page_mlock(page); | 1120 | clear_page_mlock(page); |
1121 | |||
1104 | /* | 1122 | /* |
1105 | * It would be tidy to reset the PageAnon mapping here, | 1123 | * It would be tidy to reset the PageAnon mapping here, |
1106 | * but that might overwrite a racing page_add_anon_rmap | 1124 | * but that might overwrite a racing page_add_anon_rmap |
@@ -1110,10 +1128,6 @@ void page_remove_rmap(struct page *page) | |||
1110 | * Leaving it set also helps swapoff to reinstate ptes | 1128 | * Leaving it set also helps swapoff to reinstate ptes |
1111 | * faster for those pages still in swapcache. | 1129 | * faster for those pages still in swapcache. |
1112 | */ | 1130 | */ |
1113 | return; | ||
1114 | out: | ||
1115 | if (!anon) | ||
1116 | mem_cgroup_end_update_page_stat(page, &locked, &flags); | ||
1117 | } | 1131 | } |
1118 | 1132 | ||
1119 | /* | 1133 | /* |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 3a6e0cfdf03a..dcdab81bd240 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -93,16 +93,6 @@ static int kmem_cache_sanity_check(const char *name, size_t size) | |||
93 | s->object_size); | 93 | s->object_size); |
94 | continue; | 94 | continue; |
95 | } | 95 | } |
96 | |||
97 | #if !defined(CONFIG_SLUB) | ||
98 | if (!strcmp(s->name, name)) { | ||
99 | pr_err("%s (%s): Cache name already exists.\n", | ||
100 | __func__, name); | ||
101 | dump_stack(); | ||
102 | s = NULL; | ||
103 | return -EINVAL; | ||
104 | } | ||
105 | #endif | ||
106 | } | 96 | } |
107 | 97 | ||
108 | WARN_ON(strchr(name, ' ')); /* It confuses parsers */ | 98 | WARN_ON(strchr(name, ' ')); /* It confuses parsers */ |
@@ -269,6 +259,10 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, | |||
269 | if (s->size - size >= sizeof(void *)) | 259 | if (s->size - size >= sizeof(void *)) |
270 | continue; | 260 | continue; |
271 | 261 | ||
262 | if (IS_ENABLED(CONFIG_SLAB) && align && | ||
263 | (align > s->align || s->align % align)) | ||
264 | continue; | ||
265 | |||
272 | return s; | 266 | return s; |
273 | } | 267 | } |
274 | return NULL; | 268 | return NULL; |
diff --git a/mm/truncate.c b/mm/truncate.c index 261eaf6e5a19..f1e4d6052369 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -715,8 +715,9 @@ EXPORT_SYMBOL(truncate_pagecache); | |||
715 | * necessary) to @newsize. It will be typically be called from the filesystem's | 715 | * necessary) to @newsize. It will be typically be called from the filesystem's |
716 | * setattr function when ATTR_SIZE is passed in. | 716 | * setattr function when ATTR_SIZE is passed in. |
717 | * | 717 | * |
718 | * Must be called with inode_mutex held and before all filesystem specific | 718 | * Must be called with a lock serializing truncates and writes (generally |
719 | * block truncation has been performed. | 719 | * i_mutex but e.g. xfs uses a different lock) and before all filesystem |
720 | * specific block truncation has been performed. | ||
720 | */ | 721 | */ |
721 | void truncate_setsize(struct inode *inode, loff_t newsize) | 722 | void truncate_setsize(struct inode *inode, loff_t newsize) |
722 | { | 723 | { |
@@ -755,7 +756,6 @@ void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) | |||
755 | struct page *page; | 756 | struct page *page; |
756 | pgoff_t index; | 757 | pgoff_t index; |
757 | 758 | ||
758 | WARN_ON(!mutex_is_locked(&inode->i_mutex)); | ||
759 | WARN_ON(to > inode->i_size); | 759 | WARN_ON(to > inode->i_size); |
760 | 760 | ||
761 | if (from >= to || bsize == PAGE_CACHE_SIZE) | 761 | if (from >= to || bsize == PAGE_CACHE_SIZE) |