diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-10-29 21:16:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:42 -0400 |
commit | f412ac08c9861b4791af0145934c22f1458686da (patch) | |
tree | 5e515efa116f3968c2caa75bc691a197199313a8 | |
parent | 4c21e2f2441dc5fbb957b030333f5a3f2d02dea7 (diff) |
[PATCH] mm: fix rss and mmlist locking
A couple of oddities were guarded by page_table_lock, no longer properly
guarded when that is split.
The mm_counters of file_rss and anon_rss: make those an atomic_t, or an
atomic64_t if the architecture supports it, in such a case. Definitions by
courtesy of Christoph Lameter: who spent considerable effort on more scalable
ways of counting, but found insufficient benefit in practice.
And adding an mm with swap to the mmlist for swapoff: the list is well-
guarded by its own lock, but the list_empty check now has to be repeated
inside it.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/sched.h | 42 | ||||
-rw-r--r-- | mm/memory.c | 4 | ||||
-rw-r--r-- | mm/rmap.c | 3 |
3 files changed, 43 insertions, 6 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 292cb57ce38f..1c30bc308ef1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -249,13 +249,47 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, | |||
249 | extern void arch_unmap_area(struct mm_struct *, unsigned long); | 249 | extern void arch_unmap_area(struct mm_struct *, unsigned long); |
250 | extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); | 250 | extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); |
251 | 251 | ||
252 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | ||
253 | /* | ||
254 | * The mm counters are not protected by its page_table_lock, | ||
255 | * so must be incremented atomically. | ||
256 | */ | ||
257 | #ifdef ATOMIC64_INIT | ||
258 | #define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value) | ||
259 | #define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member)) | ||
260 | #define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member) | ||
261 | #define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member) | ||
262 | #define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member) | ||
263 | typedef atomic64_t mm_counter_t; | ||
264 | #else /* !ATOMIC64_INIT */ | ||
265 | /* | ||
266 | * The counters wrap back to 0 at 2^32 * PAGE_SIZE, | ||
267 | * that is, at 16TB if using 4kB page size. | ||
268 | */ | ||
269 | #define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value) | ||
270 | #define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member)) | ||
271 | #define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member) | ||
272 | #define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member) | ||
273 | #define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member) | ||
274 | typedef atomic_t mm_counter_t; | ||
275 | #endif /* !ATOMIC64_INIT */ | ||
276 | |||
277 | #else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ | ||
278 | /* | ||
279 | * The mm counters are protected by its page_table_lock, | ||
280 | * so can be incremented directly. | ||
281 | */ | ||
252 | #define set_mm_counter(mm, member, value) (mm)->_##member = (value) | 282 | #define set_mm_counter(mm, member, value) (mm)->_##member = (value) |
253 | #define get_mm_counter(mm, member) ((mm)->_##member) | 283 | #define get_mm_counter(mm, member) ((mm)->_##member) |
254 | #define add_mm_counter(mm, member, value) (mm)->_##member += (value) | 284 | #define add_mm_counter(mm, member, value) (mm)->_##member += (value) |
255 | #define inc_mm_counter(mm, member) (mm)->_##member++ | 285 | #define inc_mm_counter(mm, member) (mm)->_##member++ |
256 | #define dec_mm_counter(mm, member) (mm)->_##member-- | 286 | #define dec_mm_counter(mm, member) (mm)->_##member-- |
257 | #define get_mm_rss(mm) ((mm)->_file_rss + (mm)->_anon_rss) | 287 | typedef unsigned long mm_counter_t; |
288 | |||
289 | #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ | ||
258 | 290 | ||
291 | #define get_mm_rss(mm) \ | ||
292 | (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) | ||
259 | #define update_hiwater_rss(mm) do { \ | 293 | #define update_hiwater_rss(mm) do { \ |
260 | unsigned long _rss = get_mm_rss(mm); \ | 294 | unsigned long _rss = get_mm_rss(mm); \ |
261 | if ((mm)->hiwater_rss < _rss) \ | 295 | if ((mm)->hiwater_rss < _rss) \ |
@@ -266,8 +300,6 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); | |||
266 | (mm)->hiwater_vm = (mm)->total_vm; \ | 300 | (mm)->hiwater_vm = (mm)->total_vm; \ |
267 | } while (0) | 301 | } while (0) |
268 | 302 | ||
269 | typedef unsigned long mm_counter_t; | ||
270 | |||
271 | struct mm_struct { | 303 | struct mm_struct { |
272 | struct vm_area_struct * mmap; /* list of VMAs */ | 304 | struct vm_area_struct * mmap; /* list of VMAs */ |
273 | struct rb_root mm_rb; | 305 | struct rb_root mm_rb; |
@@ -291,7 +323,9 @@ struct mm_struct { | |||
291 | * by mmlist_lock | 323 | * by mmlist_lock |
292 | */ | 324 | */ |
293 | 325 | ||
294 | /* Special counters protected by the page_table_lock */ | 326 | /* Special counters, in some configurations protected by the |
327 | * page_table_lock, in other configurations by being atomic. | ||
328 | */ | ||
295 | mm_counter_t _file_rss; | 329 | mm_counter_t _file_rss; |
296 | mm_counter_t _anon_rss; | 330 | mm_counter_t _anon_rss; |
297 | 331 | ||
diff --git a/mm/memory.c b/mm/memory.c index e9ef599498b5..d68421dd64ef 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -372,7 +372,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
372 | /* make sure dst_mm is on swapoff's mmlist. */ | 372 | /* make sure dst_mm is on swapoff's mmlist. */ |
373 | if (unlikely(list_empty(&dst_mm->mmlist))) { | 373 | if (unlikely(list_empty(&dst_mm->mmlist))) { |
374 | spin_lock(&mmlist_lock); | 374 | spin_lock(&mmlist_lock); |
375 | list_add(&dst_mm->mmlist, &src_mm->mmlist); | 375 | if (list_empty(&dst_mm->mmlist)) |
376 | list_add(&dst_mm->mmlist, | ||
377 | &src_mm->mmlist); | ||
376 | spin_unlock(&mmlist_lock); | 378 | spin_unlock(&mmlist_lock); |
377 | } | 379 | } |
378 | } | 380 | } |
@@ -559,7 +559,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
559 | swap_duplicate(entry); | 559 | swap_duplicate(entry); |
560 | if (list_empty(&mm->mmlist)) { | 560 | if (list_empty(&mm->mmlist)) { |
561 | spin_lock(&mmlist_lock); | 561 | spin_lock(&mmlist_lock); |
562 | list_add(&mm->mmlist, &init_mm.mmlist); | 562 | if (list_empty(&mm->mmlist)) |
563 | list_add(&mm->mmlist, &init_mm.mmlist); | ||
563 | spin_unlock(&mmlist_lock); | 564 | spin_unlock(&mmlist_lock); |
564 | } | 565 | } |
565 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); | 566 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); |