aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2009-01-06 17:39:31 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-06 18:59:03 -0500
commit2da02997e08d3efe8174c7a47696e6f7cbe69ba9 (patch)
tree2e52d9346c52dda83dc8cc9626cbe302e026ad9a
parent364aeb2849789b51bf4b9af2ddd02fee7285c54e (diff)
mm: add dirty_background_bytes and dirty_bytes sysctls
This change introduces two new sysctls to /proc/sys/vm: dirty_background_bytes and dirty_bytes. dirty_background_bytes is the counterpart to dirty_background_ratio and dirty_bytes is the counterpart to dirty_ratio. With growing memory capacities of individual machines, it's no longer sufficient to specify dirty thresholds as a percentage of the amount of dirtyable memory over the entire system. dirty_background_bytes and dirty_bytes specify quantities of memory, in bytes, that represent the dirty limits for the entire system. If either of these values is set, its value represents the amount of dirty memory that is needed to commence either background or direct writeback. When a `bytes' or `ratio' file is written, its counterpart becomes a function of the written value. For example, if dirty_bytes is written to be 8096, 8K of memory is required to commence direct writeback. dirty_ratio is then functionally equivalent to 8K / the amount of dirtyable memory: dirtyable_memory = free pages + mapped pages + file cache dirty_background_bytes = dirty_background_ratio * dirtyable_memory -or- dirty_background_ratio = dirty_background_bytes / dirtyable_memory AND dirty_bytes = dirty_ratio * dirtyable_memory -or- dirty_ratio = dirty_bytes / dirtyable_memory Only one of dirty_background_bytes and dirty_background_ratio may be specified at a time, and only one of dirty_bytes and dirty_ratio may be specified. When one sysctl is written, the other appears as 0 when read. The `bytes' files operate on a page size granularity since dirty limits are compared with ZVC values, which are in page units. Prior to this change, the minimum dirty_ratio was 5 as implemented by get_dirty_limits() although /proc/sys/vm/dirty_ratio would show any user written value between 0 and 100. This restriction is maintained, but dirty_bytes has a lower limit of only one page. Also prior to this change, the dirty_background_ratio could not equal or exceed dirty_ratio. This restriction is maintained in addition to restricting dirty_background_bytes. If either background threshold equals or exceeds that of the dirty threshold, it is implicitly set to half the dirty threshold. Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: David Rientjes <rientjes@google.com> Cc: Andrea Righi <righi.andrea@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/filesystems/proc.txt26
-rw-r--r--Documentation/sysctl/vm.txt3
-rw-r--r--include/linux/writeback.h11
-rw-r--r--kernel/sysctl.c27
-rw-r--r--mm/page-writeback.c102
5 files changed, 146 insertions, 23 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 71df353e367c..32e94635484f 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1385,6 +1385,15 @@ swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer
1385to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100 1385to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100
1386causes the kernel to prefer to reclaim dentries and inodes. 1386causes the kernel to prefer to reclaim dentries and inodes.
1387 1387
1388dirty_background_bytes
1389----------------------
1390
1391Contains the amount of dirty memory at which the pdflush background writeback
1392daemon will start writeback.
1393
1394If dirty_background_bytes is written, dirty_background_ratio becomes a function
1395of its value (dirty_background_bytes / the amount of dirtyable system memory).
1396
1388dirty_background_ratio 1397dirty_background_ratio
1389---------------------- 1398----------------------
1390 1399
@@ -1393,14 +1402,29 @@ pages + file cache, not including locked pages and HugePages), the number of
1393pages at which the pdflush background writeback daemon will start writing out 1402pages at which the pdflush background writeback daemon will start writing out
1394dirty data. 1403dirty data.
1395 1404
1405If dirty_background_ratio is written, dirty_background_bytes becomes a function
1406of its value (dirty_background_ratio * the amount of dirtyable system memory).
1407
1408dirty_bytes
1409-----------
1410
1411Contains the amount of dirty memory at which a process generating disk writes
1412will itself start writeback.
1413
1414If dirty_bytes is written, dirty_ratio becomes a function of its value
1415(dirty_bytes / the amount of dirtyable system memory).
1416
1396dirty_ratio 1417dirty_ratio
1397----------------- 1418-----------
1398 1419
1399Contains, as a percentage of the dirtyable system memory (free pages + mapped 1420Contains, as a percentage of the dirtyable system memory (free pages + mapped
1400pages + file cache, not including locked pages and HugePages), the number of 1421pages + file cache, not including locked pages and HugePages), the number of
1401pages at which a process which is generating disk writes will itself start 1422pages at which a process which is generating disk writes will itself start
1402writing out dirty data. 1423writing out dirty data.
1403 1424
1425If dirty_ratio is written, dirty_bytes becomes a function of its value
1426(dirty_ratio * the amount of dirtyable system memory).
1427
1404dirty_writeback_centisecs 1428dirty_writeback_centisecs
1405------------------------- 1429-------------------------
1406 1430
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index d79eeda7a699..cd05994a49e6 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -41,7 +41,8 @@ Currently, these files are in /proc/sys/vm:
41 41
42============================================================== 42==============================================================
43 43
44dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, 44dirty_bytes, dirty_ratio, dirty_background_bytes,
45dirty_background_ratio, dirty_expire_centisecs,
45dirty_writeback_centisecs, highmem_is_dirtyable, 46dirty_writeback_centisecs, highmem_is_dirtyable,
46vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout, 47vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout,
47drop-caches, hugepages_treat_as_movable: 48drop-caches, hugepages_treat_as_movable:
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 259e9ea58cab..bb28c975c1d7 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -107,7 +107,9 @@ void throttle_vm_writeout(gfp_t gfp_mask);
107 107
108/* These are exported to sysctl. */ 108/* These are exported to sysctl. */
109extern int dirty_background_ratio; 109extern int dirty_background_ratio;
110extern unsigned long dirty_background_bytes;
110extern int vm_dirty_ratio; 111extern int vm_dirty_ratio;
112extern unsigned long vm_dirty_bytes;
111extern int dirty_writeback_interval; 113extern int dirty_writeback_interval;
112extern int dirty_expire_interval; 114extern int dirty_expire_interval;
113extern int vm_highmem_is_dirtyable; 115extern int vm_highmem_is_dirtyable;
@@ -116,9 +118,18 @@ extern int laptop_mode;
116 118
117extern unsigned long determine_dirtyable_memory(void); 119extern unsigned long determine_dirtyable_memory(void);
118 120
121extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
122 struct file *filp, void __user *buffer, size_t *lenp,
123 loff_t *ppos);
124extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
125 struct file *filp, void __user *buffer, size_t *lenp,
126 loff_t *ppos);
119extern int dirty_ratio_handler(struct ctl_table *table, int write, 127extern int dirty_ratio_handler(struct ctl_table *table, int write,
120 struct file *filp, void __user *buffer, size_t *lenp, 128 struct file *filp, void __user *buffer, size_t *lenp,
121 loff_t *ppos); 129 loff_t *ppos);
130extern int dirty_bytes_handler(struct ctl_table *table, int write,
131 struct file *filp, void __user *buffer, size_t *lenp,
132 loff_t *ppos);
122 133
123struct ctl_table; 134struct ctl_table;
124struct file; 135struct file;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ff6d45c7626f..92f6e5bc3c24 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -87,10 +87,6 @@ extern int rcutorture_runnable;
87#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 87#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
88 88
89/* Constants used for minimum and maximum */ 89/* Constants used for minimum and maximum */
90#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
91static int one = 1;
92#endif
93
94#ifdef CONFIG_DETECT_SOFTLOCKUP 90#ifdef CONFIG_DETECT_SOFTLOCKUP
95static int sixty = 60; 91static int sixty = 60;
96static int neg_one = -1; 92static int neg_one = -1;
@@ -101,6 +97,7 @@ static int two = 2;
101#endif 97#endif
102 98
103static int zero; 99static int zero;
100static int one = 1;
104static int one_hundred = 100; 101static int one_hundred = 100;
105 102
106/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 103/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -952,12 +949,22 @@ static struct ctl_table vm_table[] = {
952 .data = &dirty_background_ratio, 949 .data = &dirty_background_ratio,
953 .maxlen = sizeof(dirty_background_ratio), 950 .maxlen = sizeof(dirty_background_ratio),
954 .mode = 0644, 951 .mode = 0644,
955 .proc_handler = &proc_dointvec_minmax, 952 .proc_handler = &dirty_background_ratio_handler,
956 .strategy = &sysctl_intvec, 953 .strategy = &sysctl_intvec,
957 .extra1 = &zero, 954 .extra1 = &zero,
958 .extra2 = &one_hundred, 955 .extra2 = &one_hundred,
959 }, 956 },
960 { 957 {
958 .ctl_name = CTL_UNNUMBERED,
959 .procname = "dirty_background_bytes",
960 .data = &dirty_background_bytes,
961 .maxlen = sizeof(dirty_background_bytes),
962 .mode = 0644,
963 .proc_handler = &dirty_background_bytes_handler,
964 .strategy = &sysctl_intvec,
965 .extra1 = &one,
966 },
967 {
961 .ctl_name = VM_DIRTY_RATIO, 968 .ctl_name = VM_DIRTY_RATIO,
962 .procname = "dirty_ratio", 969 .procname = "dirty_ratio",
963 .data = &vm_dirty_ratio, 970 .data = &vm_dirty_ratio,
@@ -969,6 +976,16 @@ static struct ctl_table vm_table[] = {
969 .extra2 = &one_hundred, 976 .extra2 = &one_hundred,
970 }, 977 },
971 { 978 {
979 .ctl_name = CTL_UNNUMBERED,
980 .procname = "dirty_bytes",
981 .data = &vm_dirty_bytes,
982 .maxlen = sizeof(vm_dirty_bytes),
983 .mode = 0644,
984 .proc_handler = &dirty_bytes_handler,
985 .strategy = &sysctl_intvec,
986 .extra1 = &one,
987 },
988 {
972 .procname = "dirty_writeback_centisecs", 989 .procname = "dirty_writeback_centisecs",
973 .data = &dirty_writeback_interval, 990 .data = &dirty_writeback_interval,
974 .maxlen = sizeof(dirty_writeback_interval), 991 .maxlen = sizeof(dirty_writeback_interval),
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4d4074cff300..b493db7841dc 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -69,6 +69,12 @@ static inline long sync_writeback_pages(void)
69int dirty_background_ratio = 5; 69int dirty_background_ratio = 5;
70 70
71/* 71/*
72 * dirty_background_bytes starts at 0 (disabled) so that it is a function of
73 * dirty_background_ratio * the amount of dirtyable memory
74 */
75unsigned long dirty_background_bytes;
76
77/*
72 * free highmem will not be subtracted from the total free memory 78 * free highmem will not be subtracted from the total free memory
73 * for calculating free ratios if vm_highmem_is_dirtyable is true 79 * for calculating free ratios if vm_highmem_is_dirtyable is true
74 */ 80 */
@@ -80,6 +86,12 @@ int vm_highmem_is_dirtyable;
80int vm_dirty_ratio = 10; 86int vm_dirty_ratio = 10;
81 87
82/* 88/*
89 * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
90 * vm_dirty_ratio * the amount of dirtyable memory
91 */
92unsigned long vm_dirty_bytes;
93
94/*
83 * The interval between `kupdate'-style writebacks, in jiffies 95 * The interval between `kupdate'-style writebacks, in jiffies
84 */ 96 */
85int dirty_writeback_interval = 5 * HZ; 97int dirty_writeback_interval = 5 * HZ;
@@ -135,23 +147,75 @@ static int calc_period_shift(void)
135{ 147{
136 unsigned long dirty_total; 148 unsigned long dirty_total;
137 149
138 dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100; 150 if (vm_dirty_bytes)
151 dirty_total = vm_dirty_bytes / PAGE_SIZE;
152 else
153 dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
154 100;
139 return 2 + ilog2(dirty_total - 1); 155 return 2 + ilog2(dirty_total - 1);
140} 156}
141 157
142/* 158/*
143 * update the period when the dirty ratio changes. 159 * update the period when the dirty threshold changes.
144 */ 160 */
161static void update_completion_period(void)
162{
163 int shift = calc_period_shift();
164 prop_change_shift(&vm_completions, shift);
165 prop_change_shift(&vm_dirties, shift);
166}
167
168int dirty_background_ratio_handler(struct ctl_table *table, int write,
169 struct file *filp, void __user *buffer, size_t *lenp,
170 loff_t *ppos)
171{
172 int ret;
173
174 ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
175 if (ret == 0 && write)
176 dirty_background_bytes = 0;
177 return ret;
178}
179
180int dirty_background_bytes_handler(struct ctl_table *table, int write,
181 struct file *filp, void __user *buffer, size_t *lenp,
182 loff_t *ppos)
183{
184 int ret;
185
186 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
187 if (ret == 0 && write)
188 dirty_background_ratio = 0;
189 return ret;
190}
191
145int dirty_ratio_handler(struct ctl_table *table, int write, 192int dirty_ratio_handler(struct ctl_table *table, int write,
146 struct file *filp, void __user *buffer, size_t *lenp, 193 struct file *filp, void __user *buffer, size_t *lenp,
147 loff_t *ppos) 194 loff_t *ppos)
148{ 195{
149 int old_ratio = vm_dirty_ratio; 196 int old_ratio = vm_dirty_ratio;
150 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); 197 int ret;
198
199 ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
151 if (ret == 0 && write && vm_dirty_ratio != old_ratio) { 200 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
152 int shift = calc_period_shift(); 201 update_completion_period();
153 prop_change_shift(&vm_completions, shift); 202 vm_dirty_bytes = 0;
154 prop_change_shift(&vm_dirties, shift); 203 }
204 return ret;
205}
206
207
208int dirty_bytes_handler(struct ctl_table *table, int write,
209 struct file *filp, void __user *buffer, size_t *lenp,
210 loff_t *ppos)
211{
212 int old_bytes = vm_dirty_bytes;
213 int ret;
214
215 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
216 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
217 update_completion_period();
218 vm_dirty_ratio = 0;
155 } 219 }
156 return ret; 220 return ret;
157} 221}
@@ -365,23 +429,29 @@ void
365get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, 429get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
366 unsigned long *pbdi_dirty, struct backing_dev_info *bdi) 430 unsigned long *pbdi_dirty, struct backing_dev_info *bdi)
367{ 431{
368 int background_ratio; /* Percentages */
369 int dirty_ratio;
370 unsigned long background; 432 unsigned long background;
371 unsigned long dirty; 433 unsigned long dirty;
372 unsigned long available_memory = determine_dirtyable_memory(); 434 unsigned long available_memory = determine_dirtyable_memory();
373 struct task_struct *tsk; 435 struct task_struct *tsk;
374 436
375 dirty_ratio = vm_dirty_ratio; 437 if (vm_dirty_bytes)
376 if (dirty_ratio < 5) 438 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
377 dirty_ratio = 5; 439 else {
440 int dirty_ratio;
378 441
379 background_ratio = dirty_background_ratio; 442 dirty_ratio = vm_dirty_ratio;
380 if (background_ratio >= dirty_ratio) 443 if (dirty_ratio < 5)
381 background_ratio = dirty_ratio / 2; 444 dirty_ratio = 5;
445 dirty = (dirty_ratio * available_memory) / 100;
446 }
447
448 if (dirty_background_bytes)
449 background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);
450 else
451 background = (dirty_background_ratio * available_memory) / 100;
382 452
383 background = (background_ratio * available_memory) / 100; 453 if (background >= dirty)
384 dirty = (dirty_ratio * available_memory) / 100; 454 background = dirty / 2;
385 tsk = current; 455 tsk = current;
386 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { 456 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
387 background += background / 4; 457 background += background / 4;