aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-10-29 21:15:57 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-30 00:40:37 -0400
commita8fb5618dab7e45c8990f3155628d772a9ed45f9 (patch)
tree77977b8fb2f57c855da9e3168977521e8393776a
parent2c0b381467bc2997be9d741a152f3fc75785eedc (diff)
[PATCH] mm: unlink_file_vma, remove_vma
Divide remove_vm_struct into two parts: first anon_vma_unlink plus unlink_file_vma, to unlink the vma from the list and tree by which rmap or vmtruncate might find it; then remove_vma to close, fput and free. The intention here is to do the anon_vma_unlink and unlink_file_vma earlier, in free_pgtables before freeing any page tables: so we can be sure that any page tables traversed by rmap and vmtruncate are stable (and other, ordinary cases are stabilized by holding mmap_sem). This will be crucial to traversing pgd,pud,pmd without page_table_lock. But testing the split-out patch showed that lifting the page_table_lock is symbiotically necessary to make this change - the lock ordering is wrong to move those unlinks into free_pgtables while it's under ptlock. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/mmap.c41
2 files changed, 28 insertions, 14 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 376a466743bc..0c64484d8ae0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -834,6 +834,7 @@ extern int split_vma(struct mm_struct *,
834extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); 834extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
835extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, 835extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
836 struct rb_node **, struct rb_node *); 836 struct rb_node **, struct rb_node *);
837extern void unlink_file_vma(struct vm_area_struct *);
837extern struct vm_area_struct *copy_vma(struct vm_area_struct **, 838extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
838 unsigned long addr, unsigned long len, pgoff_t pgoff); 839 unsigned long addr, unsigned long len, pgoff_t pgoff);
839extern void exit_mmap(struct mm_struct *); 840extern void exit_mmap(struct mm_struct *);
diff --git a/mm/mmap.c b/mm/mmap.c
index eeefe19a0fac..a3984fad3fc2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -181,26 +181,44 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
181} 181}
182 182
183/* 183/*
184 * Remove one vm structure and free it. 184 * Unlink a file-based vm structure from its prio_tree, to hide
185 * vma from rmap and vmtruncate before freeing its page tables.
185 */ 186 */
186static void remove_vm_struct(struct vm_area_struct *vma) 187void unlink_file_vma(struct vm_area_struct *vma)
187{ 188{
188 struct file *file = vma->vm_file; 189 struct file *file = vma->vm_file;
189 190
190 might_sleep();
191 if (file) { 191 if (file) {
192 struct address_space *mapping = file->f_mapping; 192 struct address_space *mapping = file->f_mapping;
193 spin_lock(&mapping->i_mmap_lock); 193 spin_lock(&mapping->i_mmap_lock);
194 __remove_shared_vm_struct(vma, file, mapping); 194 __remove_shared_vm_struct(vma, file, mapping);
195 spin_unlock(&mapping->i_mmap_lock); 195 spin_unlock(&mapping->i_mmap_lock);
196 } 196 }
197}
198
199/*
200 * Close a vm structure and free it, returning the next.
201 */
202static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
203{
204 struct vm_area_struct *next = vma->vm_next;
205
206 /*
207 * Hide vma from rmap and vmtruncate before freeing page tables:
208 * to be moved into free_pgtables once page_table_lock is lifted
209 * from it, but until then lock ordering forbids that move.
210 */
211 anon_vma_unlink(vma);
212 unlink_file_vma(vma);
213
214 might_sleep();
197 if (vma->vm_ops && vma->vm_ops->close) 215 if (vma->vm_ops && vma->vm_ops->close)
198 vma->vm_ops->close(vma); 216 vma->vm_ops->close(vma);
199 if (file) 217 if (vma->vm_file)
200 fput(file); 218 fput(vma->vm_file);
201 anon_vma_unlink(vma);
202 mpol_free(vma_policy(vma)); 219 mpol_free(vma_policy(vma));
203 kmem_cache_free(vm_area_cachep, vma); 220 kmem_cache_free(vm_area_cachep, vma);
221 return next;
204} 222}
205 223
206asmlinkage unsigned long sys_brk(unsigned long brk) 224asmlinkage unsigned long sys_brk(unsigned long brk)
@@ -1612,15 +1630,13 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
1612static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) 1630static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1613{ 1631{
1614 do { 1632 do {
1615 struct vm_area_struct *next = vma->vm_next;
1616 long nrpages = vma_pages(vma); 1633 long nrpages = vma_pages(vma);
1617 1634
1618 mm->total_vm -= nrpages; 1635 mm->total_vm -= nrpages;
1619 if (vma->vm_flags & VM_LOCKED) 1636 if (vma->vm_flags & VM_LOCKED)
1620 mm->locked_vm -= nrpages; 1637 mm->locked_vm -= nrpages;
1621 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); 1638 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1622 remove_vm_struct(vma); 1639 vma = remove_vma(vma);
1623 vma = next;
1624 } while (vma); 1640 } while (vma);
1625 validate_mm(mm); 1641 validate_mm(mm);
1626} 1642}
@@ -1944,11 +1960,8 @@ void exit_mmap(struct mm_struct *mm)
1944 * Walk the list again, actually closing and freeing it 1960 * Walk the list again, actually closing and freeing it
1945 * without holding any MM locks. 1961 * without holding any MM locks.
1946 */ 1962 */
1947 while (vma) { 1963 while (vma)
1948 struct vm_area_struct *next = vma->vm_next; 1964 vma = remove_vma(vma);
1949 remove_vm_struct(vma);
1950 vma = next;
1951 }
1952 1965
1953 BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); 1966 BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
1954} 1967}