diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-10-29 21:16:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:39 -0400 |
commit | 365e9c87a982c03d0af3886e29d877f581b59611 (patch) | |
tree | d06c1918ca9fe6677d7e4e869555e095004274f7 /include/linux | |
parent | 861f2fb8e796022b4928cab9c74fca6681a1c557 (diff) |
[PATCH] mm: update_hiwaters just in time
update_mem_hiwater has attracted various criticisms, in particular from those
concerned with mm scalability. Originally it was called whenever rss or
total_vm got raised. Then many of those callsites were replaced by a timer
tick call from account_system_time. Now Frank van Maarseveen reports that to
be found inadequate. How about this? Works for Frank.
Replace update_mem_hiwater, a poor combination of two unrelated ops, by macros
update_hiwater_rss and update_hiwater_vm. Don't attempt to keep
mm->hiwater_rss up to date at timer tick, nor every time we raise rss (usually
by 1): those are hot paths. Do the opposite, update only when about to lower
rss (usually by many), or just before final accounting in do_exit. Handle
mm->hiwater_vm in the same way, though it's much less of an issue. Demand
that whoever collects these hiwater statistics do the work of taking the
maximum with rss or total_vm.
And there has been no collector of these hiwater statistics in the tree. The
new convention needs an example, so match Frank's usage by adding a VmPeak
line above VmSize to /proc/<pid>/status, and also a VmHWM line above VmRSS
(High-Water-Mark or High-Water-Memory).
There was a particular anomaly during mremap move, that hiwater_vm might be
captured too high. A fleeting such anomaly remains, but it's quickly
corrected now, whereas before it would stick.
What locking? None: if the app is racy then these statistics will be racy,
it's not worth any overhead to make them exact. But whenever it suits,
hiwater_vm is updated under exclusive mmap_sem, and hiwater_rss under
page_table_lock (for now) or with preemption disabled (later on): without
going to any trouble, minimize the time between reading current values and
updating, to minimize those occasions when a racing thread bumps a count up
and back down in between.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/mm.h | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 10 |
2 files changed, 10 insertions, 3 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index da42093250c3..7d4552fe0864 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -938,9 +938,6 @@ static inline void vm_stat_account(struct mm_struct *mm, | |||
938 | } | 938 | } |
939 | #endif /* CONFIG_PROC_FS */ | 939 | #endif /* CONFIG_PROC_FS */ |
940 | 940 | ||
941 | /* update per process rss and vm hiwater data */ | ||
942 | extern void update_mem_hiwater(struct task_struct *tsk); | ||
943 | |||
944 | #ifndef CONFIG_DEBUG_PAGEALLOC | 941 | #ifndef CONFIG_DEBUG_PAGEALLOC |
945 | static inline void | 942 | static inline void |
946 | kernel_map_pages(struct page *page, int numpages, int enable) | 943 | kernel_map_pages(struct page *page, int numpages, int enable) |
diff --git a/include/linux/sched.h b/include/linux/sched.h index afcaac66cbd5..a9c0b7d26303 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -256,6 +256,16 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); | |||
256 | #define dec_mm_counter(mm, member) (mm)->_##member-- | 256 | #define dec_mm_counter(mm, member) (mm)->_##member-- |
257 | #define get_mm_rss(mm) ((mm)->_file_rss + (mm)->_anon_rss) | 257 | #define get_mm_rss(mm) ((mm)->_file_rss + (mm)->_anon_rss) |
258 | 258 | ||
259 | #define update_hiwater_rss(mm) do { \ | ||
260 | unsigned long _rss = get_mm_rss(mm); \ | ||
261 | if ((mm)->hiwater_rss < _rss) \ | ||
262 | (mm)->hiwater_rss = _rss; \ | ||
263 | } while (0) | ||
264 | #define update_hiwater_vm(mm) do { \ | ||
265 | if ((mm)->hiwater_vm < (mm)->total_vm) \ | ||
266 | (mm)->hiwater_vm = (mm)->total_vm; \ | ||
267 | } while (0) | ||
268 | |||
259 | typedef unsigned long mm_counter_t; | 269 | typedef unsigned long mm_counter_t; |
260 | 270 | ||
261 | struct mm_struct { | 271 | struct mm_struct { |