diff options
author | David Rientjes <rientjes@google.com> | 2009-01-06 17:39:31 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-06 18:59:03 -0500 |
commit | 2da02997e08d3efe8174c7a47696e6f7cbe69ba9 (patch) | |
tree | 2e52d9346c52dda83dc8cc9626cbe302e026ad9a | |
parent | 364aeb2849789b51bf4b9af2ddd02fee7285c54e (diff) |
mm: add dirty_background_bytes and dirty_bytes sysctls
This change introduces two new sysctls to /proc/sys/vm:
dirty_background_bytes and dirty_bytes.
dirty_background_bytes is the counterpart to dirty_background_ratio and
dirty_bytes is the counterpart to dirty_ratio.
With growing memory capacities of individual machines, it's no longer
sufficient to specify dirty thresholds as a percentage of the amount of
dirtyable memory over the entire system.
dirty_background_bytes and dirty_bytes specify quantities of memory, in
bytes, that represent the dirty limits for the entire system. If either
of these values is set, its value represents the amount of dirty memory
that is needed to commence either background or direct writeback.
When a `bytes' or `ratio' file is written, its counterpart becomes a
function of the written value. For example, if dirty_bytes is written to
be 8096, 8K of memory is required to commence direct writeback.
dirty_ratio is then functionally equivalent to 8K / the amount of
dirtyable memory:
dirtyable_memory = free pages + mapped pages + file cache
dirty_background_bytes = dirty_background_ratio * dirtyable_memory
-or-
dirty_background_ratio = dirty_background_bytes / dirtyable_memory
AND
dirty_bytes = dirty_ratio * dirtyable_memory
-or-
dirty_ratio = dirty_bytes / dirtyable_memory
Only one of dirty_background_bytes and dirty_background_ratio may be
specified at a time, and only one of dirty_bytes and dirty_ratio may be
specified. When one sysctl is written, the other appears as 0 when read.
The `bytes' files operate on a page size granularity since dirty limits
are compared with ZVC values, which are in page units.
Prior to this change, the minimum dirty_ratio was 5 as implemented by
get_dirty_limits() although /proc/sys/vm/dirty_ratio would show any user
written value between 0 and 100. This restriction is maintained, but
dirty_bytes has a lower limit of only one page.
Also prior to this change, the dirty_background_ratio could not equal or
exceed dirty_ratio. This restriction is maintained in addition to
restricting dirty_background_bytes. If either background threshold equals
or exceeds that of the dirty threshold, it is implicitly set to half the
dirty threshold.
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/filesystems/proc.txt | 26 | ||||
-rw-r--r-- | Documentation/sysctl/vm.txt | 3 | ||||
-rw-r--r-- | include/linux/writeback.h | 11 | ||||
-rw-r--r-- | kernel/sysctl.c | 27 | ||||
-rw-r--r-- | mm/page-writeback.c | 102 |
5 files changed, 146 insertions, 23 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 71df353e367c..32e94635484f 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -1385,6 +1385,15 @@ swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer | |||
1385 | to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100 | 1385 | to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100 |
1386 | causes the kernel to prefer to reclaim dentries and inodes. | 1386 | causes the kernel to prefer to reclaim dentries and inodes. |
1387 | 1387 | ||
1388 | dirty_background_bytes | ||
1389 | ---------------------- | ||
1390 | |||
1391 | Contains the amount of dirty memory at which the pdflush background writeback | ||
1392 | daemon will start writeback. | ||
1393 | |||
1394 | If dirty_background_bytes is written, dirty_background_ratio becomes a function | ||
1395 | of its value (dirty_background_bytes / the amount of dirtyable system memory). | ||
1396 | |||
1388 | dirty_background_ratio | 1397 | dirty_background_ratio |
1389 | ---------------------- | 1398 | ---------------------- |
1390 | 1399 | ||
@@ -1393,14 +1402,29 @@ pages + file cache, not including locked pages and HugePages), the number of | |||
1393 | pages at which the pdflush background writeback daemon will start writing out | 1402 | pages at which the pdflush background writeback daemon will start writing out |
1394 | dirty data. | 1403 | dirty data. |
1395 | 1404 | ||
1405 | If dirty_background_ratio is written, dirty_background_bytes becomes a function | ||
1406 | of its value (dirty_background_ratio * the amount of dirtyable system memory). | ||
1407 | |||
1408 | dirty_bytes | ||
1409 | ----------- | ||
1410 | |||
1411 | Contains the amount of dirty memory at which a process generating disk writes | ||
1412 | will itself start writeback. | ||
1413 | |||
1414 | If dirty_bytes is written, dirty_ratio becomes a function of its value | ||
1415 | (dirty_bytes / the amount of dirtyable system memory). | ||
1416 | |||
1396 | dirty_ratio | 1417 | dirty_ratio |
1397 | ----------------- | 1418 | ----------- |
1398 | 1419 | ||
1399 | Contains, as a percentage of the dirtyable system memory (free pages + mapped | 1420 | Contains, as a percentage of the dirtyable system memory (free pages + mapped |
1400 | pages + file cache, not including locked pages and HugePages), the number of | 1421 | pages + file cache, not including locked pages and HugePages), the number of |
1401 | pages at which a process which is generating disk writes will itself start | 1422 | pages at which a process which is generating disk writes will itself start |
1402 | writing out dirty data. | 1423 | writing out dirty data. |
1403 | 1424 | ||
1425 | If dirty_ratio is written, dirty_bytes becomes a function of its value | ||
1426 | (dirty_ratio * the amount of dirtyable system memory). | ||
1427 | |||
1404 | dirty_writeback_centisecs | 1428 | dirty_writeback_centisecs |
1405 | ------------------------- | 1429 | ------------------------- |
1406 | 1430 | ||
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index d79eeda7a699..cd05994a49e6 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -41,7 +41,8 @@ Currently, these files are in /proc/sys/vm: | |||
41 | 41 | ||
42 | ============================================================== | 42 | ============================================================== |
43 | 43 | ||
44 | dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, | 44 | dirty_bytes, dirty_ratio, dirty_background_bytes, |
45 | dirty_background_ratio, dirty_expire_centisecs, | ||
45 | dirty_writeback_centisecs, highmem_is_dirtyable, | 46 | dirty_writeback_centisecs, highmem_is_dirtyable, |
46 | vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout, | 47 | vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout, |
47 | drop-caches, hugepages_treat_as_movable: | 48 | drop-caches, hugepages_treat_as_movable: |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 259e9ea58cab..bb28c975c1d7 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -107,7 +107,9 @@ void throttle_vm_writeout(gfp_t gfp_mask); | |||
107 | 107 | ||
108 | /* These are exported to sysctl. */ | 108 | /* These are exported to sysctl. */ |
109 | extern int dirty_background_ratio; | 109 | extern int dirty_background_ratio; |
110 | extern unsigned long dirty_background_bytes; | ||
110 | extern int vm_dirty_ratio; | 111 | extern int vm_dirty_ratio; |
112 | extern unsigned long vm_dirty_bytes; | ||
111 | extern int dirty_writeback_interval; | 113 | extern int dirty_writeback_interval; |
112 | extern int dirty_expire_interval; | 114 | extern int dirty_expire_interval; |
113 | extern int vm_highmem_is_dirtyable; | 115 | extern int vm_highmem_is_dirtyable; |
@@ -116,9 +118,18 @@ extern int laptop_mode; | |||
116 | 118 | ||
117 | extern unsigned long determine_dirtyable_memory(void); | 119 | extern unsigned long determine_dirtyable_memory(void); |
118 | 120 | ||
121 | extern int dirty_background_ratio_handler(struct ctl_table *table, int write, | ||
122 | struct file *filp, void __user *buffer, size_t *lenp, | ||
123 | loff_t *ppos); | ||
124 | extern int dirty_background_bytes_handler(struct ctl_table *table, int write, | ||
125 | struct file *filp, void __user *buffer, size_t *lenp, | ||
126 | loff_t *ppos); | ||
119 | extern int dirty_ratio_handler(struct ctl_table *table, int write, | 127 | extern int dirty_ratio_handler(struct ctl_table *table, int write, |
120 | struct file *filp, void __user *buffer, size_t *lenp, | 128 | struct file *filp, void __user *buffer, size_t *lenp, |
121 | loff_t *ppos); | 129 | loff_t *ppos); |
130 | extern int dirty_bytes_handler(struct ctl_table *table, int write, | ||
131 | struct file *filp, void __user *buffer, size_t *lenp, | ||
132 | loff_t *ppos); | ||
122 | 133 | ||
123 | struct ctl_table; | 134 | struct ctl_table; |
124 | struct file; | 135 | struct file; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ff6d45c7626f..92f6e5bc3c24 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -87,10 +87,6 @@ extern int rcutorture_runnable; | |||
87 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 87 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
88 | 88 | ||
89 | /* Constants used for minimum and maximum */ | 89 | /* Constants used for minimum and maximum */ |
90 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) | ||
91 | static int one = 1; | ||
92 | #endif | ||
93 | |||
94 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 90 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
95 | static int sixty = 60; | 91 | static int sixty = 60; |
96 | static int neg_one = -1; | 92 | static int neg_one = -1; |
@@ -101,6 +97,7 @@ static int two = 2; | |||
101 | #endif | 97 | #endif |
102 | 98 | ||
103 | static int zero; | 99 | static int zero; |
100 | static int one = 1; | ||
104 | static int one_hundred = 100; | 101 | static int one_hundred = 100; |
105 | 102 | ||
106 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 103 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
@@ -952,12 +949,22 @@ static struct ctl_table vm_table[] = { | |||
952 | .data = &dirty_background_ratio, | 949 | .data = &dirty_background_ratio, |
953 | .maxlen = sizeof(dirty_background_ratio), | 950 | .maxlen = sizeof(dirty_background_ratio), |
954 | .mode = 0644, | 951 | .mode = 0644, |
955 | .proc_handler = &proc_dointvec_minmax, | 952 | .proc_handler = &dirty_background_ratio_handler, |
956 | .strategy = &sysctl_intvec, | 953 | .strategy = &sysctl_intvec, |
957 | .extra1 = &zero, | 954 | .extra1 = &zero, |
958 | .extra2 = &one_hundred, | 955 | .extra2 = &one_hundred, |
959 | }, | 956 | }, |
960 | { | 957 | { |
958 | .ctl_name = CTL_UNNUMBERED, | ||
959 | .procname = "dirty_background_bytes", | ||
960 | .data = &dirty_background_bytes, | ||
961 | .maxlen = sizeof(dirty_background_bytes), | ||
962 | .mode = 0644, | ||
963 | .proc_handler = &dirty_background_bytes_handler, | ||
964 | .strategy = &sysctl_intvec, | ||
965 | .extra1 = &one, | ||
966 | }, | ||
967 | { | ||
961 | .ctl_name = VM_DIRTY_RATIO, | 968 | .ctl_name = VM_DIRTY_RATIO, |
962 | .procname = "dirty_ratio", | 969 | .procname = "dirty_ratio", |
963 | .data = &vm_dirty_ratio, | 970 | .data = &vm_dirty_ratio, |
@@ -969,6 +976,16 @@ static struct ctl_table vm_table[] = { | |||
969 | .extra2 = &one_hundred, | 976 | .extra2 = &one_hundred, |
970 | }, | 977 | }, |
971 | { | 978 | { |
979 | .ctl_name = CTL_UNNUMBERED, | ||
980 | .procname = "dirty_bytes", | ||
981 | .data = &vm_dirty_bytes, | ||
982 | .maxlen = sizeof(vm_dirty_bytes), | ||
983 | .mode = 0644, | ||
984 | .proc_handler = &dirty_bytes_handler, | ||
985 | .strategy = &sysctl_intvec, | ||
986 | .extra1 = &one, | ||
987 | }, | ||
988 | { | ||
972 | .procname = "dirty_writeback_centisecs", | 989 | .procname = "dirty_writeback_centisecs", |
973 | .data = &dirty_writeback_interval, | 990 | .data = &dirty_writeback_interval, |
974 | .maxlen = sizeof(dirty_writeback_interval), | 991 | .maxlen = sizeof(dirty_writeback_interval), |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4d4074cff300..b493db7841dc 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -69,6 +69,12 @@ static inline long sync_writeback_pages(void) | |||
69 | int dirty_background_ratio = 5; | 69 | int dirty_background_ratio = 5; |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * dirty_background_bytes starts at 0 (disabled) so that it is a function of | ||
73 | * dirty_background_ratio * the amount of dirtyable memory | ||
74 | */ | ||
75 | unsigned long dirty_background_bytes; | ||
76 | |||
77 | /* | ||
72 | * free highmem will not be subtracted from the total free memory | 78 | * free highmem will not be subtracted from the total free memory |
73 | * for calculating free ratios if vm_highmem_is_dirtyable is true | 79 | * for calculating free ratios if vm_highmem_is_dirtyable is true |
74 | */ | 80 | */ |
@@ -80,6 +86,12 @@ int vm_highmem_is_dirtyable; | |||
80 | int vm_dirty_ratio = 10; | 86 | int vm_dirty_ratio = 10; |
81 | 87 | ||
82 | /* | 88 | /* |
89 | * vm_dirty_bytes starts at 0 (disabled) so that it is a function of | ||
90 | * vm_dirty_ratio * the amount of dirtyable memory | ||
91 | */ | ||
92 | unsigned long vm_dirty_bytes; | ||
93 | |||
94 | /* | ||
83 | * The interval between `kupdate'-style writebacks, in jiffies | 95 | * The interval between `kupdate'-style writebacks, in jiffies |
84 | */ | 96 | */ |
85 | int dirty_writeback_interval = 5 * HZ; | 97 | int dirty_writeback_interval = 5 * HZ; |
@@ -135,23 +147,75 @@ static int calc_period_shift(void) | |||
135 | { | 147 | { |
136 | unsigned long dirty_total; | 148 | unsigned long dirty_total; |
137 | 149 | ||
138 | dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100; | 150 | if (vm_dirty_bytes) |
151 | dirty_total = vm_dirty_bytes / PAGE_SIZE; | ||
152 | else | ||
153 | dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / | ||
154 | 100; | ||
139 | return 2 + ilog2(dirty_total - 1); | 155 | return 2 + ilog2(dirty_total - 1); |
140 | } | 156 | } |
141 | 157 | ||
142 | /* | 158 | /* |
143 | * update the period when the dirty ratio changes. | 159 | * update the period when the dirty threshold changes. |
144 | */ | 160 | */ |
161 | static void update_completion_period(void) | ||
162 | { | ||
163 | int shift = calc_period_shift(); | ||
164 | prop_change_shift(&vm_completions, shift); | ||
165 | prop_change_shift(&vm_dirties, shift); | ||
166 | } | ||
167 | |||
168 | int dirty_background_ratio_handler(struct ctl_table *table, int write, | ||
169 | struct file *filp, void __user *buffer, size_t *lenp, | ||
170 | loff_t *ppos) | ||
171 | { | ||
172 | int ret; | ||
173 | |||
174 | ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
175 | if (ret == 0 && write) | ||
176 | dirty_background_bytes = 0; | ||
177 | return ret; | ||
178 | } | ||
179 | |||
180 | int dirty_background_bytes_handler(struct ctl_table *table, int write, | ||
181 | struct file *filp, void __user *buffer, size_t *lenp, | ||
182 | loff_t *ppos) | ||
183 | { | ||
184 | int ret; | ||
185 | |||
186 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
187 | if (ret == 0 && write) | ||
188 | dirty_background_ratio = 0; | ||
189 | return ret; | ||
190 | } | ||
191 | |||
145 | int dirty_ratio_handler(struct ctl_table *table, int write, | 192 | int dirty_ratio_handler(struct ctl_table *table, int write, |
146 | struct file *filp, void __user *buffer, size_t *lenp, | 193 | struct file *filp, void __user *buffer, size_t *lenp, |
147 | loff_t *ppos) | 194 | loff_t *ppos) |
148 | { | 195 | { |
149 | int old_ratio = vm_dirty_ratio; | 196 | int old_ratio = vm_dirty_ratio; |
150 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | 197 | int ret; |
198 | |||
199 | ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
151 | if (ret == 0 && write && vm_dirty_ratio != old_ratio) { | 200 | if (ret == 0 && write && vm_dirty_ratio != old_ratio) { |
152 | int shift = calc_period_shift(); | 201 | update_completion_period(); |
153 | prop_change_shift(&vm_completions, shift); | 202 | vm_dirty_bytes = 0; |
154 | prop_change_shift(&vm_dirties, shift); | 203 | } |
204 | return ret; | ||
205 | } | ||
206 | |||
207 | |||
208 | int dirty_bytes_handler(struct ctl_table *table, int write, | ||
209 | struct file *filp, void __user *buffer, size_t *lenp, | ||
210 | loff_t *ppos) | ||
211 | { | ||
212 | int old_bytes = vm_dirty_bytes; | ||
213 | int ret; | ||
214 | |||
215 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
216 | if (ret == 0 && write && vm_dirty_bytes != old_bytes) { | ||
217 | update_completion_period(); | ||
218 | vm_dirty_ratio = 0; | ||
155 | } | 219 | } |
156 | return ret; | 220 | return ret; |
157 | } | 221 | } |
@@ -365,23 +429,29 @@ void | |||
365 | get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, | 429 | get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, |
366 | unsigned long *pbdi_dirty, struct backing_dev_info *bdi) | 430 | unsigned long *pbdi_dirty, struct backing_dev_info *bdi) |
367 | { | 431 | { |
368 | int background_ratio; /* Percentages */ | ||
369 | int dirty_ratio; | ||
370 | unsigned long background; | 432 | unsigned long background; |
371 | unsigned long dirty; | 433 | unsigned long dirty; |
372 | unsigned long available_memory = determine_dirtyable_memory(); | 434 | unsigned long available_memory = determine_dirtyable_memory(); |
373 | struct task_struct *tsk; | 435 | struct task_struct *tsk; |
374 | 436 | ||
375 | dirty_ratio = vm_dirty_ratio; | 437 | if (vm_dirty_bytes) |
376 | if (dirty_ratio < 5) | 438 | dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE); |
377 | dirty_ratio = 5; | 439 | else { |
440 | int dirty_ratio; | ||
378 | 441 | ||
379 | background_ratio = dirty_background_ratio; | 442 | dirty_ratio = vm_dirty_ratio; |
380 | if (background_ratio >= dirty_ratio) | 443 | if (dirty_ratio < 5) |
381 | background_ratio = dirty_ratio / 2; | 444 | dirty_ratio = 5; |
445 | dirty = (dirty_ratio * available_memory) / 100; | ||
446 | } | ||
447 | |||
448 | if (dirty_background_bytes) | ||
449 | background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE); | ||
450 | else | ||
451 | background = (dirty_background_ratio * available_memory) / 100; | ||
382 | 452 | ||
383 | background = (background_ratio * available_memory) / 100; | 453 | if (background >= dirty) |
384 | dirty = (dirty_ratio * available_memory) / 100; | 454 | background = dirty / 2; |
385 | tsk = current; | 455 | tsk = current; |
386 | if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { | 456 | if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { |
387 | background += background / 4; | 457 | background += background / 4; |