diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2011-05-24 20:12:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-25 11:39:17 -0400 |
commit | 97a894136f29802da19a15541de3c019e1ca147e (patch) | |
tree | 1fd3f92ba92a37d5d8527a1f41458091d0a944dc | |
parent | e4c70a6629f9c74c4b0de258a3951890e9047c82 (diff) |
mm: Remove i_mmap_lock lockbreak
Hugh says:
"The only significant loser, I think, would be page reclaim (when
concurrent with truncation): could spin for a long time waiting for
the i_mmap_mutex it expects would soon be dropped? "
Counter points:
- cpu contention makes the spin stop (need_resched())
- zap pages should be freeing pages at a higher rate than reclaim
ever can
I think the simplification of the truncate code is definitely worth it.
Effectively reverts: 2aa15890f3c ("mm: prevent concurrent
unmap_mapping_range() on the same inode") and takes out the code that
caused its problem.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/inode.c | 1 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | include/linux/mm_types.h | 1 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | mm/memory.c | 195 | ||||
-rw-r--r-- | mm/mmap.c | 13 | ||||
-rw-r--r-- | mm/mremap.c | 1 |
8 files changed, 28 insertions, 188 deletions
diff --git a/fs/inode.c b/fs/inode.c index 05f4fa521325..7a7284c71abd 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -331,7 +331,6 @@ void address_space_init_once(struct address_space *mapping) | |||
331 | spin_lock_init(&mapping->private_lock); | 331 | spin_lock_init(&mapping->private_lock); |
332 | INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); | 332 | INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); |
333 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); | 333 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); |
334 | mutex_init(&mapping->unmap_mutex); | ||
335 | } | 334 | } |
336 | EXPORT_SYMBOL(address_space_init_once); | 335 | EXPORT_SYMBOL(address_space_init_once); |
337 | 336 | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index cdf9495df204..5d2c86bdf5ba 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -635,7 +635,6 @@ struct address_space { | |||
635 | struct prio_tree_root i_mmap; /* tree of private and shared mappings */ | 635 | struct prio_tree_root i_mmap; /* tree of private and shared mappings */ |
636 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ | 636 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ |
637 | spinlock_t i_mmap_lock; /* protect tree, count, list */ | 637 | spinlock_t i_mmap_lock; /* protect tree, count, list */ |
638 | unsigned int truncate_count; /* Cover race condition with truncate */ | ||
639 | unsigned long nrpages; /* number of total pages */ | 638 | unsigned long nrpages; /* number of total pages */ |
640 | pgoff_t writeback_index;/* writeback starts here */ | 639 | pgoff_t writeback_index;/* writeback starts here */ |
641 | const struct address_space_operations *a_ops; /* methods */ | 640 | const struct address_space_operations *a_ops; /* methods */ |
@@ -644,7 +643,6 @@ struct address_space { | |||
644 | spinlock_t private_lock; /* for use by the address_space */ | 643 | spinlock_t private_lock; /* for use by the address_space */ |
645 | struct list_head private_list; /* ditto */ | 644 | struct list_head private_list; /* ditto */ |
646 | struct address_space *assoc_mapping; /* ditto */ | 645 | struct address_space *assoc_mapping; /* ditto */ |
647 | struct mutex unmap_mutex; /* to protect unmapping */ | ||
648 | } __attribute__((aligned(sizeof(long)))); | 646 | } __attribute__((aligned(sizeof(long)))); |
649 | /* | 647 | /* |
650 | * On most architectures that alignment is already the case; but | 648 | * On most architectures that alignment is already the case; but |
diff --git a/include/linux/mm.h b/include/linux/mm.h index ffcce9bf2b54..2ad0ac8c3f32 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -895,8 +895,6 @@ struct zap_details { | |||
895 | struct address_space *check_mapping; /* Check page->mapping if set */ | 895 | struct address_space *check_mapping; /* Check page->mapping if set */ |
896 | pgoff_t first_index; /* Lowest page->index to unmap */ | 896 | pgoff_t first_index; /* Lowest page->index to unmap */ |
897 | pgoff_t last_index; /* Highest page->index to unmap */ | 897 | pgoff_t last_index; /* Highest page->index to unmap */ |
898 | spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */ | ||
899 | unsigned long truncate_count; /* Compare vm_truncate_count */ | ||
900 | }; | 898 | }; |
901 | 899 | ||
902 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, | 900 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 02aa5619709b..201998e5b530 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -175,7 +175,6 @@ struct vm_area_struct { | |||
175 | units, *not* PAGE_CACHE_SIZE */ | 175 | units, *not* PAGE_CACHE_SIZE */ |
176 | struct file * vm_file; /* File we map to (can be NULL). */ | 176 | struct file * vm_file; /* File we map to (can be NULL). */ |
177 | void * vm_private_data; /* was vm_pte (shared mem) */ | 177 | void * vm_private_data; /* was vm_pte (shared mem) */ |
178 | unsigned long vm_truncate_count;/* truncate_count or restart_addr */ | ||
179 | 178 | ||
180 | #ifndef CONFIG_MMU | 179 | #ifndef CONFIG_MMU |
181 | struct vm_region *vm_region; /* NOMMU mapping region */ | 180 | struct vm_region *vm_region; /* NOMMU mapping region */ |
diff --git a/kernel/fork.c b/kernel/fork.c index 2b44d82b8237..4eef925477fc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -386,7 +386,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
386 | spin_lock(&mapping->i_mmap_lock); | 386 | spin_lock(&mapping->i_mmap_lock); |
387 | if (tmp->vm_flags & VM_SHARED) | 387 | if (tmp->vm_flags & VM_SHARED) |
388 | mapping->i_mmap_writable++; | 388 | mapping->i_mmap_writable++; |
389 | tmp->vm_truncate_count = mpnt->vm_truncate_count; | ||
390 | flush_dcache_mmap_lock(mapping); | 389 | flush_dcache_mmap_lock(mapping); |
391 | /* insert tmp into the share list, just after mpnt */ | 390 | /* insert tmp into the share list, just after mpnt */ |
392 | vma_prio_tree_add(tmp, mpnt); | 391 | vma_prio_tree_add(tmp, mpnt); |
diff --git a/mm/memory.c b/mm/memory.c index 17193d74f302..18655878b9f8 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -986,13 +986,13 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
986 | static unsigned long zap_pte_range(struct mmu_gather *tlb, | 986 | static unsigned long zap_pte_range(struct mmu_gather *tlb, |
987 | struct vm_area_struct *vma, pmd_t *pmd, | 987 | struct vm_area_struct *vma, pmd_t *pmd, |
988 | unsigned long addr, unsigned long end, | 988 | unsigned long addr, unsigned long end, |
989 | long *zap_work, struct zap_details *details) | 989 | struct zap_details *details) |
990 | { | 990 | { |
991 | struct mm_struct *mm = tlb->mm; | 991 | struct mm_struct *mm = tlb->mm; |
992 | int force_flush = 0; | 992 | int force_flush = 0; |
993 | pte_t *pte; | ||
994 | spinlock_t *ptl; | ||
995 | int rss[NR_MM_COUNTERS]; | 993 | int rss[NR_MM_COUNTERS]; |
994 | spinlock_t *ptl; | ||
995 | pte_t *pte; | ||
996 | 996 | ||
997 | again: | 997 | again: |
998 | init_rss_vec(rss); | 998 | init_rss_vec(rss); |
@@ -1001,12 +1001,9 @@ again: | |||
1001 | do { | 1001 | do { |
1002 | pte_t ptent = *pte; | 1002 | pte_t ptent = *pte; |
1003 | if (pte_none(ptent)) { | 1003 | if (pte_none(ptent)) { |
1004 | (*zap_work)--; | ||
1005 | continue; | 1004 | continue; |
1006 | } | 1005 | } |
1007 | 1006 | ||
1008 | (*zap_work) -= PAGE_SIZE; | ||
1009 | |||
1010 | if (pte_present(ptent)) { | 1007 | if (pte_present(ptent)) { |
1011 | struct page *page; | 1008 | struct page *page; |
1012 | 1009 | ||
@@ -1075,7 +1072,7 @@ again: | |||
1075 | print_bad_pte(vma, addr, ptent, NULL); | 1072 | print_bad_pte(vma, addr, ptent, NULL); |
1076 | } | 1073 | } |
1077 | pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); | 1074 | pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); |
1078 | } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); | 1075 | } while (pte++, addr += PAGE_SIZE, addr != end); |
1079 | 1076 | ||
1080 | add_mm_rss_vec(mm, rss); | 1077 | add_mm_rss_vec(mm, rss); |
1081 | arch_leave_lazy_mmu_mode(); | 1078 | arch_leave_lazy_mmu_mode(); |
@@ -1099,7 +1096,7 @@ again: | |||
1099 | static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, | 1096 | static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, |
1100 | struct vm_area_struct *vma, pud_t *pud, | 1097 | struct vm_area_struct *vma, pud_t *pud, |
1101 | unsigned long addr, unsigned long end, | 1098 | unsigned long addr, unsigned long end, |
1102 | long *zap_work, struct zap_details *details) | 1099 | struct zap_details *details) |
1103 | { | 1100 | { |
1104 | pmd_t *pmd; | 1101 | pmd_t *pmd; |
1105 | unsigned long next; | 1102 | unsigned long next; |
@@ -1111,19 +1108,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, | |||
1111 | if (next-addr != HPAGE_PMD_SIZE) { | 1108 | if (next-addr != HPAGE_PMD_SIZE) { |
1112 | VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); | 1109 | VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); |
1113 | split_huge_page_pmd(vma->vm_mm, pmd); | 1110 | split_huge_page_pmd(vma->vm_mm, pmd); |
1114 | } else if (zap_huge_pmd(tlb, vma, pmd)) { | 1111 | } else if (zap_huge_pmd(tlb, vma, pmd)) |
1115 | (*zap_work)--; | ||
1116 | continue; | 1112 | continue; |
1117 | } | ||
1118 | /* fall through */ | 1113 | /* fall through */ |
1119 | } | 1114 | } |
1120 | if (pmd_none_or_clear_bad(pmd)) { | 1115 | if (pmd_none_or_clear_bad(pmd)) |
1121 | (*zap_work)--; | ||
1122 | continue; | 1116 | continue; |
1123 | } | 1117 | next = zap_pte_range(tlb, vma, pmd, addr, next, details); |
1124 | next = zap_pte_range(tlb, vma, pmd, addr, next, | 1118 | cond_resched(); |
1125 | zap_work, details); | 1119 | } while (pmd++, addr = next, addr != end); |
1126 | } while (pmd++, addr = next, (addr != end && *zap_work > 0)); | ||
1127 | 1120 | ||
1128 | return addr; | 1121 | return addr; |
1129 | } | 1122 | } |
@@ -1131,7 +1124,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, | |||
1131 | static inline unsigned long zap_pud_range(struct mmu_gather *tlb, | 1124 | static inline unsigned long zap_pud_range(struct mmu_gather *tlb, |
1132 | struct vm_area_struct *vma, pgd_t *pgd, | 1125 | struct vm_area_struct *vma, pgd_t *pgd, |
1133 | unsigned long addr, unsigned long end, | 1126 | unsigned long addr, unsigned long end, |
1134 | long *zap_work, struct zap_details *details) | 1127 | struct zap_details *details) |
1135 | { | 1128 | { |
1136 | pud_t *pud; | 1129 | pud_t *pud; |
1137 | unsigned long next; | 1130 | unsigned long next; |
@@ -1139,13 +1132,10 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, | |||
1139 | pud = pud_offset(pgd, addr); | 1132 | pud = pud_offset(pgd, addr); |
1140 | do { | 1133 | do { |
1141 | next = pud_addr_end(addr, end); | 1134 | next = pud_addr_end(addr, end); |
1142 | if (pud_none_or_clear_bad(pud)) { | 1135 | if (pud_none_or_clear_bad(pud)) |
1143 | (*zap_work)--; | ||
1144 | continue; | 1136 | continue; |
1145 | } | 1137 | next = zap_pmd_range(tlb, vma, pud, addr, next, details); |
1146 | next = zap_pmd_range(tlb, vma, pud, addr, next, | 1138 | } while (pud++, addr = next, addr != end); |
1147 | zap_work, details); | ||
1148 | } while (pud++, addr = next, (addr != end && *zap_work > 0)); | ||
1149 | 1139 | ||
1150 | return addr; | 1140 | return addr; |
1151 | } | 1141 | } |
@@ -1153,7 +1143,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, | |||
1153 | static unsigned long unmap_page_range(struct mmu_gather *tlb, | 1143 | static unsigned long unmap_page_range(struct mmu_gather *tlb, |
1154 | struct vm_area_struct *vma, | 1144 | struct vm_area_struct *vma, |
1155 | unsigned long addr, unsigned long end, | 1145 | unsigned long addr, unsigned long end, |
1156 | long *zap_work, struct zap_details *details) | 1146 | struct zap_details *details) |
1157 | { | 1147 | { |
1158 | pgd_t *pgd; | 1148 | pgd_t *pgd; |
1159 | unsigned long next; | 1149 | unsigned long next; |
@@ -1167,13 +1157,10 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, | |||
1167 | pgd = pgd_offset(vma->vm_mm, addr); | 1157 | pgd = pgd_offset(vma->vm_mm, addr); |
1168 | do { | 1158 | do { |
1169 | next = pgd_addr_end(addr, end); | 1159 | next = pgd_addr_end(addr, end); |
1170 | if (pgd_none_or_clear_bad(pgd)) { | 1160 | if (pgd_none_or_clear_bad(pgd)) |
1171 | (*zap_work)--; | ||
1172 | continue; | 1161 | continue; |
1173 | } | 1162 | next = zap_pud_range(tlb, vma, pgd, addr, next, details); |
1174 | next = zap_pud_range(tlb, vma, pgd, addr, next, | 1163 | } while (pgd++, addr = next, addr != end); |
1175 | zap_work, details); | ||
1176 | } while (pgd++, addr = next, (addr != end && *zap_work > 0)); | ||
1177 | tlb_end_vma(tlb, vma); | 1164 | tlb_end_vma(tlb, vma); |
1178 | mem_cgroup_uncharge_end(); | 1165 | mem_cgroup_uncharge_end(); |
1179 | 1166 | ||
@@ -1218,9 +1205,7 @@ unsigned long unmap_vmas(struct mmu_gather *tlb, | |||
1218 | unsigned long end_addr, unsigned long *nr_accounted, | 1205 | unsigned long end_addr, unsigned long *nr_accounted, |
1219 | struct zap_details *details) | 1206 | struct zap_details *details) |
1220 | { | 1207 | { |
1221 | long zap_work = ZAP_BLOCK_SIZE; | ||
1222 | unsigned long start = start_addr; | 1208 | unsigned long start = start_addr; |
1223 | spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; | ||
1224 | struct mm_struct *mm = vma->vm_mm; | 1209 | struct mm_struct *mm = vma->vm_mm; |
1225 | 1210 | ||
1226 | mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); | 1211 | mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); |
@@ -1253,33 +1238,15 @@ unsigned long unmap_vmas(struct mmu_gather *tlb, | |||
1253 | * Since no pte has actually been setup, it is | 1238 | * Since no pte has actually been setup, it is |
1254 | * safe to do nothing in this case. | 1239 | * safe to do nothing in this case. |
1255 | */ | 1240 | */ |
1256 | if (vma->vm_file) { | 1241 | if (vma->vm_file) |
1257 | unmap_hugepage_range(vma, start, end, NULL); | 1242 | unmap_hugepage_range(vma, start, end, NULL); |
1258 | zap_work -= (end - start) / | ||
1259 | pages_per_huge_page(hstate_vma(vma)); | ||
1260 | } | ||
1261 | 1243 | ||
1262 | start = end; | 1244 | start = end; |
1263 | } else | 1245 | } else |
1264 | start = unmap_page_range(tlb, vma, | 1246 | start = unmap_page_range(tlb, vma, start, end, details); |
1265 | start, end, &zap_work, details); | ||
1266 | |||
1267 | if (zap_work > 0) { | ||
1268 | BUG_ON(start != end); | ||
1269 | break; | ||
1270 | } | ||
1271 | |||
1272 | if (need_resched() || | ||
1273 | (i_mmap_lock && spin_needbreak(i_mmap_lock))) { | ||
1274 | if (i_mmap_lock) | ||
1275 | goto out; | ||
1276 | cond_resched(); | ||
1277 | } | ||
1278 | |||
1279 | zap_work = ZAP_BLOCK_SIZE; | ||
1280 | } | 1247 | } |
1281 | } | 1248 | } |
1282 | out: | 1249 | |
1283 | mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); | 1250 | mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); |
1284 | return start; /* which is now the end (or restart) address */ | 1251 | return start; /* which is now the end (or restart) address */ |
1285 | } | 1252 | } |
@@ -2612,96 +2579,11 @@ unwritable_page: | |||
2612 | return ret; | 2579 | return ret; |
2613 | } | 2580 | } |
2614 | 2581 | ||
2615 | /* | 2582 | static void unmap_mapping_range_vma(struct vm_area_struct *vma, |
2616 | * Helper functions for unmap_mapping_range(). | ||
2617 | * | ||
2618 | * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __ | ||
2619 | * | ||
2620 | * We have to restart searching the prio_tree whenever we drop the lock, | ||
2621 | * since the iterator is only valid while the lock is held, and anyway | ||
2622 | * a later vma might be split and reinserted earlier while lock dropped. | ||
2623 | * | ||
2624 | * The list of nonlinear vmas could be handled more efficiently, using | ||
2625 | * a placeholder, but handle it in the same way until a need is shown. | ||
2626 | * It is important to search the prio_tree before nonlinear list: a vma | ||
2627 | * may become nonlinear and be shifted from prio_tree to nonlinear list | ||
2628 | * while the lock is dropped; but never shifted from list to prio_tree. | ||
2629 | * | ||
2630 | * In order to make forward progress despite restarting the search, | ||
2631 | * vm_truncate_count is used to mark a vma as now dealt with, so we can | ||
2632 | * quickly skip it next time around. Since the prio_tree search only | ||
2633 | * shows us those vmas affected by unmapping the range in question, we | ||
2634 | * can't efficiently keep all vmas in step with mapping->truncate_count: | ||
2635 | * so instead reset them all whenever it wraps back to 0 (then go to 1). | ||
2636 | * mapping->truncate_count and vma->vm_truncate_count are protected by | ||
2637 | * i_mmap_lock. | ||
2638 | * | ||
2639 | * In order to make forward progress despite repeatedly restarting some | ||
2640 | * large vma, note the restart_addr from unmap_vmas when it breaks out: | ||
2641 | * and restart from that address when we reach that vma again. It might | ||
2642 | * have been split or merged, shrunk or extended, but never shifted: so | ||
2643 | * restart_addr remains valid so long as it remains in the vma's range. | ||
2644 | * unmap_mapping_range forces truncate_count to leap over page-aligned | ||
2645 | * values so we can save vma's restart_addr in its truncate_count field. | ||
2646 | */ | ||
2647 | #define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK)) | ||
2648 | |||
2649 | static void reset_vma_truncate_counts(struct address_space *mapping) | ||
2650 | { | ||
2651 | struct vm_area_struct *vma; | ||
2652 | struct prio_tree_iter iter; | ||
2653 | |||
2654 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) | ||
2655 | vma->vm_truncate_count = 0; | ||
2656 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) | ||
2657 | vma->vm_truncate_count = 0; | ||
2658 | } | ||
2659 | |||
2660 | static int unmap_mapping_range_vma(struct vm_area_struct *vma, | ||
2661 | unsigned long start_addr, unsigned long end_addr, | 2583 | unsigned long start_addr, unsigned long end_addr, |
2662 | struct zap_details *details) | 2584 | struct zap_details *details) |
2663 | { | 2585 | { |
2664 | unsigned long restart_addr; | 2586 | zap_page_range(vma, start_addr, end_addr - start_addr, details); |
2665 | int need_break; | ||
2666 | |||
2667 | /* | ||
2668 | * files that support invalidating or truncating portions of the | ||
2669 | * file from under mmaped areas must have their ->fault function | ||
2670 | * return a locked page (and set VM_FAULT_LOCKED in the return). | ||
2671 | * This provides synchronisation against concurrent unmapping here. | ||
2672 | */ | ||
2673 | |||
2674 | again: | ||
2675 | restart_addr = vma->vm_truncate_count; | ||
2676 | if (is_restart_addr(restart_addr) && start_addr < restart_addr) { | ||
2677 | start_addr = restart_addr; | ||
2678 | if (start_addr >= end_addr) { | ||
2679 | /* Top of vma has been split off since last time */ | ||
2680 | vma->vm_truncate_count = details->truncate_count; | ||
2681 | return 0; | ||
2682 | } | ||
2683 | } | ||
2684 | |||
2685 | restart_addr = zap_page_range(vma, start_addr, | ||
2686 | end_addr - start_addr, details); | ||
2687 | need_break = need_resched() || spin_needbreak(details->i_mmap_lock); | ||
2688 | |||
2689 | if (restart_addr >= end_addr) { | ||
2690 | /* We have now completed this vma: mark it so */ | ||
2691 | vma->vm_truncate_count = details->truncate_count; | ||
2692 | if (!need_break) | ||
2693 | return 0; | ||
2694 | } else { | ||
2695 | /* Note restart_addr in vma's truncate_count field */ | ||
2696 | vma->vm_truncate_count = restart_addr; | ||
2697 | if (!need_break) | ||
2698 | goto again; | ||
2699 | } | ||
2700 | |||
2701 | spin_unlock(details->i_mmap_lock); | ||
2702 | cond_resched(); | ||
2703 | spin_lock(details->i_mmap_lock); | ||
2704 | return -EINTR; | ||
2705 | } | 2587 | } |
2706 | 2588 | ||
2707 | static inline void unmap_mapping_range_tree(struct prio_tree_root *root, | 2589 | static inline void unmap_mapping_range_tree(struct prio_tree_root *root, |
@@ -2711,12 +2593,8 @@ static inline void unmap_mapping_range_tree(struct prio_tree_root *root, | |||
2711 | struct prio_tree_iter iter; | 2593 | struct prio_tree_iter iter; |
2712 | pgoff_t vba, vea, zba, zea; | 2594 | pgoff_t vba, vea, zba, zea; |
2713 | 2595 | ||
2714 | restart: | ||
2715 | vma_prio_tree_foreach(vma, &iter, root, | 2596 | vma_prio_tree_foreach(vma, &iter, root, |
2716 | details->first_index, details->last_index) { | 2597 | details->first_index, details->last_index) { |
2717 | /* Skip quickly over those we have already dealt with */ | ||
2718 | if (vma->vm_truncate_count == details->truncate_count) | ||
2719 | continue; | ||
2720 | 2598 | ||
2721 | vba = vma->vm_pgoff; | 2599 | vba = vma->vm_pgoff; |
2722 | vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1; | 2600 | vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1; |
@@ -2728,11 +2606,10 @@ restart: | |||
2728 | if (zea > vea) | 2606 | if (zea > vea) |
2729 | zea = vea; | 2607 | zea = vea; |
2730 | 2608 | ||
2731 | if (unmap_mapping_range_vma(vma, | 2609 | unmap_mapping_range_vma(vma, |
2732 | ((zba - vba) << PAGE_SHIFT) + vma->vm_start, | 2610 | ((zba - vba) << PAGE_SHIFT) + vma->vm_start, |
2733 | ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, | 2611 | ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, |
2734 | details) < 0) | 2612 | details); |
2735 | goto restart; | ||
2736 | } | 2613 | } |
2737 | } | 2614 | } |
2738 | 2615 | ||
@@ -2747,15 +2624,9 @@ static inline void unmap_mapping_range_list(struct list_head *head, | |||
2747 | * across *all* the pages in each nonlinear VMA, not just the pages | 2624 | * across *all* the pages in each nonlinear VMA, not just the pages |
2748 | * whose virtual address lies outside the file truncation point. | 2625 | * whose virtual address lies outside the file truncation point. |
2749 | */ | 2626 | */ |
2750 | restart: | ||
2751 | list_for_each_entry(vma, head, shared.vm_set.list) { | 2627 | list_for_each_entry(vma, head, shared.vm_set.list) { |
2752 | /* Skip quickly over those we have already dealt with */ | ||
2753 | if (vma->vm_truncate_count == details->truncate_count) | ||
2754 | continue; | ||
2755 | details->nonlinear_vma = vma; | 2628 | details->nonlinear_vma = vma; |
2756 | if (unmap_mapping_range_vma(vma, vma->vm_start, | 2629 | unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); |
2757 | vma->vm_end, details) < 0) | ||
2758 | goto restart; | ||
2759 | } | 2630 | } |
2760 | } | 2631 | } |
2761 | 2632 | ||
@@ -2794,26 +2665,14 @@ void unmap_mapping_range(struct address_space *mapping, | |||
2794 | details.last_index = hba + hlen - 1; | 2665 | details.last_index = hba + hlen - 1; |
2795 | if (details.last_index < details.first_index) | 2666 | if (details.last_index < details.first_index) |
2796 | details.last_index = ULONG_MAX; | 2667 | details.last_index = ULONG_MAX; |
2797 | details.i_mmap_lock = &mapping->i_mmap_lock; | ||
2798 | 2668 | ||
2799 | mutex_lock(&mapping->unmap_mutex); | ||
2800 | spin_lock(&mapping->i_mmap_lock); | ||
2801 | |||
2802 | /* Protect against endless unmapping loops */ | ||
2803 | mapping->truncate_count++; | ||
2804 | if (unlikely(is_restart_addr(mapping->truncate_count))) { | ||
2805 | if (mapping->truncate_count == 0) | ||
2806 | reset_vma_truncate_counts(mapping); | ||
2807 | mapping->truncate_count++; | ||
2808 | } | ||
2809 | details.truncate_count = mapping->truncate_count; | ||
2810 | 2669 | ||
2670 | spin_lock(&mapping->i_mmap_lock); | ||
2811 | if (unlikely(!prio_tree_empty(&mapping->i_mmap))) | 2671 | if (unlikely(!prio_tree_empty(&mapping->i_mmap))) |
2812 | unmap_mapping_range_tree(&mapping->i_mmap, &details); | 2672 | unmap_mapping_range_tree(&mapping->i_mmap, &details); |
2813 | if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) | 2673 | if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) |
2814 | unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); | 2674 | unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); |
2815 | spin_unlock(&mapping->i_mmap_lock); | 2675 | spin_unlock(&mapping->i_mmap_lock); |
2816 | mutex_unlock(&mapping->unmap_mutex); | ||
2817 | } | 2676 | } |
2818 | EXPORT_SYMBOL(unmap_mapping_range); | 2677 | EXPORT_SYMBOL(unmap_mapping_range); |
2819 | 2678 | ||
@@ -445,10 +445,8 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, | |||
445 | if (vma->vm_file) | 445 | if (vma->vm_file) |
446 | mapping = vma->vm_file->f_mapping; | 446 | mapping = vma->vm_file->f_mapping; |
447 | 447 | ||
448 | if (mapping) { | 448 | if (mapping) |
449 | spin_lock(&mapping->i_mmap_lock); | 449 | spin_lock(&mapping->i_mmap_lock); |
450 | vma->vm_truncate_count = mapping->truncate_count; | ||
451 | } | ||
452 | 450 | ||
453 | __vma_link(mm, vma, prev, rb_link, rb_parent); | 451 | __vma_link(mm, vma, prev, rb_link, rb_parent); |
454 | __vma_link_file(vma); | 452 | __vma_link_file(vma); |
@@ -558,16 +556,7 @@ again: remove_next = 1 + (end > next->vm_end); | |||
558 | if (!(vma->vm_flags & VM_NONLINEAR)) | 556 | if (!(vma->vm_flags & VM_NONLINEAR)) |
559 | root = &mapping->i_mmap; | 557 | root = &mapping->i_mmap; |
560 | spin_lock(&mapping->i_mmap_lock); | 558 | spin_lock(&mapping->i_mmap_lock); |
561 | if (importer && | ||
562 | vma->vm_truncate_count != next->vm_truncate_count) { | ||
563 | /* | ||
564 | * unmap_mapping_range might be in progress: | ||
565 | * ensure that the expanding vma is rescanned. | ||
566 | */ | ||
567 | importer->vm_truncate_count = 0; | ||
568 | } | ||
569 | if (insert) { | 559 | if (insert) { |
570 | insert->vm_truncate_count = vma->vm_truncate_count; | ||
571 | /* | 560 | /* |
572 | * Put into prio_tree now, so instantiated pages | 561 | * Put into prio_tree now, so instantiated pages |
573 | * are visible to arm/parisc __flush_dcache_page | 562 | * are visible to arm/parisc __flush_dcache_page |
diff --git a/mm/mremap.c b/mm/mremap.c index a7c1f9f9b941..909e1e1e99b1 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -94,7 +94,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, | |||
94 | */ | 94 | */ |
95 | mapping = vma->vm_file->f_mapping; | 95 | mapping = vma->vm_file->f_mapping; |
96 | spin_lock(&mapping->i_mmap_lock); | 96 | spin_lock(&mapping->i_mmap_lock); |
97 | new_vma->vm_truncate_count = 0; | ||
98 | } | 97 | } |
99 | 98 | ||
100 | /* | 99 | /* |