diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-10-29 21:15:57 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:37 -0400 |
commit | a8fb5618dab7e45c8990f3155628d772a9ed45f9 (patch) | |
tree | 77977b8fb2f57c855da9e3168977521e8393776a | |
parent | 2c0b381467bc2997be9d741a152f3fc75785eedc (diff) |
[PATCH] mm: unlink_file_vma, remove_vma
Divide remove_vm_struct into two parts: first anon_vma_unlink plus
unlink_file_vma, to unlink the vma from the list and tree by which rmap or
vmtruncate might find it; then remove_vma to close, fput and free.
The intention here is to do the anon_vma_unlink and unlink_file_vma earlier,
in free_pgtables before freeing any page tables: so we can be sure that any
page tables traversed by rmap and vmtruncate are stable (and other, ordinary
cases are stabilized by holding mmap_sem).
This will be crucial to traversing pgd,pud,pmd without page_table_lock. But
testing the split-out patch showed that lifting the page_table_lock is
symbiotically necessary to make this change - the lock ordering is wrong to
move those unlinks into free_pgtables while it's under ptlock.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/mm.h | 1 | ||||
-rw-r--r-- | mm/mmap.c | 41 |
2 files changed, 28 insertions, 14 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 376a466743bc..0c64484d8ae0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -834,6 +834,7 @@ extern int split_vma(struct mm_struct *, | |||
834 | extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); | 834 | extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); |
835 | extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, | 835 | extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, |
836 | struct rb_node **, struct rb_node *); | 836 | struct rb_node **, struct rb_node *); |
837 | extern void unlink_file_vma(struct vm_area_struct *); | ||
837 | extern struct vm_area_struct *copy_vma(struct vm_area_struct **, | 838 | extern struct vm_area_struct *copy_vma(struct vm_area_struct **, |
838 | unsigned long addr, unsigned long len, pgoff_t pgoff); | 839 | unsigned long addr, unsigned long len, pgoff_t pgoff); |
839 | extern void exit_mmap(struct mm_struct *); | 840 | extern void exit_mmap(struct mm_struct *); |
@@ -181,26 +181,44 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, | |||
181 | } | 181 | } |
182 | 182 | ||
183 | /* | 183 | /* |
184 | * Remove one vm structure and free it. | 184 | * Unlink a file-based vm structure from its prio_tree, to hide |
185 | * vma from rmap and vmtruncate before freeing its page tables. | ||
185 | */ | 186 | */ |
186 | static void remove_vm_struct(struct vm_area_struct *vma) | 187 | void unlink_file_vma(struct vm_area_struct *vma) |
187 | { | 188 | { |
188 | struct file *file = vma->vm_file; | 189 | struct file *file = vma->vm_file; |
189 | 190 | ||
190 | might_sleep(); | ||
191 | if (file) { | 191 | if (file) { |
192 | struct address_space *mapping = file->f_mapping; | 192 | struct address_space *mapping = file->f_mapping; |
193 | spin_lock(&mapping->i_mmap_lock); | 193 | spin_lock(&mapping->i_mmap_lock); |
194 | __remove_shared_vm_struct(vma, file, mapping); | 194 | __remove_shared_vm_struct(vma, file, mapping); |
195 | spin_unlock(&mapping->i_mmap_lock); | 195 | spin_unlock(&mapping->i_mmap_lock); |
196 | } | 196 | } |
197 | } | ||
198 | |||
199 | /* | ||
200 | * Close a vm structure and free it, returning the next. | ||
201 | */ | ||
202 | static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) | ||
203 | { | ||
204 | struct vm_area_struct *next = vma->vm_next; | ||
205 | |||
206 | /* | ||
207 | * Hide vma from rmap and vmtruncate before freeing page tables: | ||
208 | * to be moved into free_pgtables once page_table_lock is lifted | ||
209 | * from it, but until then lock ordering forbids that move. | ||
210 | */ | ||
211 | anon_vma_unlink(vma); | ||
212 | unlink_file_vma(vma); | ||
213 | |||
214 | might_sleep(); | ||
197 | if (vma->vm_ops && vma->vm_ops->close) | 215 | if (vma->vm_ops && vma->vm_ops->close) |
198 | vma->vm_ops->close(vma); | 216 | vma->vm_ops->close(vma); |
199 | if (file) | 217 | if (vma->vm_file) |
200 | fput(file); | 218 | fput(vma->vm_file); |
201 | anon_vma_unlink(vma); | ||
202 | mpol_free(vma_policy(vma)); | 219 | mpol_free(vma_policy(vma)); |
203 | kmem_cache_free(vm_area_cachep, vma); | 220 | kmem_cache_free(vm_area_cachep, vma); |
221 | return next; | ||
204 | } | 222 | } |
205 | 223 | ||
206 | asmlinkage unsigned long sys_brk(unsigned long brk) | 224 | asmlinkage unsigned long sys_brk(unsigned long brk) |
@@ -1612,15 +1630,13 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr) | |||
1612 | static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) | 1630 | static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) |
1613 | { | 1631 | { |
1614 | do { | 1632 | do { |
1615 | struct vm_area_struct *next = vma->vm_next; | ||
1616 | long nrpages = vma_pages(vma); | 1633 | long nrpages = vma_pages(vma); |
1617 | 1634 | ||
1618 | mm->total_vm -= nrpages; | 1635 | mm->total_vm -= nrpages; |
1619 | if (vma->vm_flags & VM_LOCKED) | 1636 | if (vma->vm_flags & VM_LOCKED) |
1620 | mm->locked_vm -= nrpages; | 1637 | mm->locked_vm -= nrpages; |
1621 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); | 1638 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); |
1622 | remove_vm_struct(vma); | 1639 | vma = remove_vma(vma); |
1623 | vma = next; | ||
1624 | } while (vma); | 1640 | } while (vma); |
1625 | validate_mm(mm); | 1641 | validate_mm(mm); |
1626 | } | 1642 | } |
@@ -1944,11 +1960,8 @@ void exit_mmap(struct mm_struct *mm) | |||
1944 | * Walk the list again, actually closing and freeing it | 1960 | * Walk the list again, actually closing and freeing it |
1945 | * without holding any MM locks. | 1961 | * without holding any MM locks. |
1946 | */ | 1962 | */ |
1947 | while (vma) { | 1963 | while (vma) |
1948 | struct vm_area_struct *next = vma->vm_next; | 1964 | vma = remove_vma(vma); |
1949 | remove_vm_struct(vma); | ||
1950 | vma = next; | ||
1951 | } | ||
1952 | 1965 | ||
1953 | BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); | 1966 | BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); |
1954 | } | 1967 | } |