aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-04-30 03:54:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-30 11:29:50 -0400
commita42dde04152750426cc620fd277e80fffae2f65a (patch)
tree98cdf246280c59e0bf044ae6d2c229fc27b73203
parent189d3c4a94ef19fca2a71a6a336e9fda900e25e7 (diff)
mm: bdi: allow setting a maximum for the bdi dirty limit
Add "max_ratio" to /sys/class/bdi. This indicates the maximum percentage of the global dirty threshold allocated to this bdi. [mszeredi@suse.cz] - fix parsing in max_ratio_store(). - export bdi_set_max_ratio() to modules - limit bdi_dirty with bdi->max_ratio - document new sysfs attribute Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/ABI/testing/sysfs-class-bdi9
-rw-r--r--include/linux/backing-dev.h2
-rw-r--r--include/linux/proportions.h13
-rw-r--r--lib/proportions.c38
-rw-r--r--mm/backing-dev.c21
-rw-r--r--mm/page-writeback.c41
6 files changed, 111 insertions, 13 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
index b9e8a9368dc6..c55e811ca180 100644
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -49,4 +49,11 @@ min_ratio (read-write)
49 Minimal percentage of global dirty threshold allocated to this 49 Minimal percentage of global dirty threshold allocated to this
50 bdi. If the value written to this file would make the the sum 50 bdi. If the value written to this file would make the the sum
51 of all min_ratio values exceed 100, then EINVAL is returned. 51 of all min_ratio values exceed 100, then EINVAL is returned.
52 The default is zero 52 If min_ratio would become larger than the current max_ratio,
53 then also EINVAL is returned. The default is zero
54
55max_ratio (read-write)
56
57 Maximal percentage of global dirty threshold allocated to this
58 bdi. If max_ratio would become smaller than the current
59 min_ratio, then EINVAL is returned. The default is 100
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 9a8965518d1d..ad3271d1e90a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -52,6 +52,7 @@ struct backing_dev_info {
52 int dirty_exceeded; 52 int dirty_exceeded;
53 53
54 unsigned int min_ratio; 54 unsigned int min_ratio;
55 unsigned int max_ratio, max_prop_frac;
55 56
56 struct device *dev; 57 struct device *dev;
57}; 58};
@@ -140,6 +141,7 @@ static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi)
140} 141}
141 142
142int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); 143int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
144int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
143 145
144/* 146/*
145 * Flags in backing_dev_info::capability 147 * Flags in backing_dev_info::capability
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index 2c3b3cad92be..5afc1b23346d 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -78,6 +78,19 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
78} 78}
79 79
80/* 80/*
81 * Limit the time part in order to ensure there are some bits left for the
82 * cycle counter and fraction multiply.
83 */
84#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
85
86#define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1)
87#define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT)
88
89void __prop_inc_percpu_max(struct prop_descriptor *pd,
90 struct prop_local_percpu *pl, long frac);
91
92
93/*
81 * ----- SINGLE ------ 94 * ----- SINGLE ------
82 */ 95 */
83 96
diff --git a/lib/proportions.c b/lib/proportions.c
index 9508d9a7af3e..4f387a643d72 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -73,12 +73,6 @@
73#include <linux/proportions.h> 73#include <linux/proportions.h>
74#include <linux/rcupdate.h> 74#include <linux/rcupdate.h>
75 75
76/*
77 * Limit the time part in order to ensure there are some bits left for the
78 * cycle counter.
79 */
80#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
81
82int prop_descriptor_init(struct prop_descriptor *pd, int shift) 76int prop_descriptor_init(struct prop_descriptor *pd, int shift)
83{ 77{
84 int err; 78 int err;
@@ -268,6 +262,38 @@ void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
268} 262}
269 263
270/* 264/*
265 * identical to __prop_inc_percpu, except that it limits this pl's fraction to
266 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
267 */
268void __prop_inc_percpu_max(struct prop_descriptor *pd,
269 struct prop_local_percpu *pl, long frac)
270{
271 struct prop_global *pg = prop_get_global(pd);
272
273 prop_norm_percpu(pg, pl);
274
275 if (unlikely(frac != PROP_FRAC_BASE)) {
276 unsigned long period_2 = 1UL << (pg->shift - 1);
277 unsigned long counter_mask = period_2 - 1;
278 unsigned long global_count;
279 long numerator, denominator;
280
281 numerator = percpu_counter_read_positive(&pl->events);
282 global_count = percpu_counter_read(&pg->events);
283 denominator = period_2 + (global_count & counter_mask);
284
285 if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
286 goto out_put;
287 }
288
289 percpu_counter_add(&pl->events, 1);
290 percpu_counter_add(&pg->events, 1);
291
292out_put:
293 prop_put_global(pd, pg);
294}
295
296/*
271 * Obtain a fraction of this proportion 297 * Obtain a fraction of this proportion
272 * 298 *
273 * p_{j} = x_{j} / (period/2 + t % period/2) 299 * p_{j} = x_{j} / (period/2 + t % period/2)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 4967fb176e53..08361b6aad50 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -73,6 +73,24 @@ static ssize_t min_ratio_store(struct device *dev,
73} 73}
74BDI_SHOW(min_ratio, bdi->min_ratio) 74BDI_SHOW(min_ratio, bdi->min_ratio)
75 75
76static ssize_t max_ratio_store(struct device *dev,
77 struct device_attribute *attr, const char *buf, size_t count)
78{
79 struct backing_dev_info *bdi = dev_get_drvdata(dev);
80 char *end;
81 unsigned int ratio;
82 ssize_t ret = -EINVAL;
83
84 ratio = simple_strtoul(buf, &end, 10);
85 if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) {
86 ret = bdi_set_max_ratio(bdi, ratio);
87 if (!ret)
88 ret = count;
89 }
90 return ret;
91}
92BDI_SHOW(max_ratio, bdi->max_ratio)
93
76#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) 94#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
77 95
78static struct device_attribute bdi_dev_attrs[] = { 96static struct device_attribute bdi_dev_attrs[] = {
@@ -82,6 +100,7 @@ static struct device_attribute bdi_dev_attrs[] = {
82 __ATTR_RO(dirty_kb), 100 __ATTR_RO(dirty_kb),
83 __ATTR_RO(bdi_dirty_kb), 101 __ATTR_RO(bdi_dirty_kb),
84 __ATTR_RW(min_ratio), 102 __ATTR_RW(min_ratio),
103 __ATTR_RW(max_ratio),
85 __ATTR_NULL, 104 __ATTR_NULL,
86}; 105};
87 106
@@ -147,6 +166,8 @@ int bdi_init(struct backing_dev_info *bdi)
147 bdi->dev = NULL; 166 bdi->dev = NULL;
148 167
149 bdi->min_ratio = 0; 168 bdi->min_ratio = 0;
169 bdi->max_ratio = 100;
170 bdi->max_prop_frac = PROP_FRAC_BASE;
150 171
151 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 172 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
152 err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); 173 err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4ac077f4269c..2a9942f5387c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -164,7 +164,8 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
164 */ 164 */
165static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) 165static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
166{ 166{
167 __prop_inc_percpu(&vm_completions, &bdi->completions); 167 __prop_inc_percpu_max(&vm_completions, &bdi->completions,
168 bdi->max_prop_frac);
168} 169}
169 170
170static inline void task_dirty_inc(struct task_struct *tsk) 171static inline void task_dirty_inc(struct task_struct *tsk)
@@ -254,16 +255,42 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
254 unsigned long flags; 255 unsigned long flags;
255 256
256 spin_lock_irqsave(&bdi_lock, flags); 257 spin_lock_irqsave(&bdi_lock, flags);
257 min_ratio -= bdi->min_ratio; 258 if (min_ratio > bdi->max_ratio) {
258 if (bdi_min_ratio + min_ratio < 100) {
259 bdi_min_ratio += min_ratio;
260 bdi->min_ratio += min_ratio;
261 } else
262 ret = -EINVAL; 259 ret = -EINVAL;
260 } else {
261 min_ratio -= bdi->min_ratio;
262 if (bdi_min_ratio + min_ratio < 100) {
263 bdi_min_ratio += min_ratio;
264 bdi->min_ratio += min_ratio;
265 } else {
266 ret = -EINVAL;
267 }
268 }
269 spin_unlock_irqrestore(&bdi_lock, flags);
270
271 return ret;
272}
273
274int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
275{
276 unsigned long flags;
277 int ret = 0;
278
279 if (max_ratio > 100)
280 return -EINVAL;
281
282 spin_lock_irqsave(&bdi_lock, flags);
283 if (bdi->min_ratio > max_ratio) {
284 ret = -EINVAL;
285 } else {
286 bdi->max_ratio = max_ratio;
287 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
288 }
263 spin_unlock_irqrestore(&bdi_lock, flags); 289 spin_unlock_irqrestore(&bdi_lock, flags);
264 290
265 return ret; 291 return ret;
266} 292}
293EXPORT_SYMBOL(bdi_set_max_ratio);
267 294
268/* 295/*
269 * Work out the current dirty-memory clamping and background writeout 296 * Work out the current dirty-memory clamping and background writeout
@@ -365,6 +392,8 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
365 bdi_dirty *= numerator; 392 bdi_dirty *= numerator;
366 do_div(bdi_dirty, denominator); 393 do_div(bdi_dirty, denominator);
367 bdi_dirty += (dirty * bdi->min_ratio) / 100; 394 bdi_dirty += (dirty * bdi->min_ratio) / 100;
395 if (bdi_dirty > (dirty * bdi->max_ratio) / 100)
396 bdi_dirty = dirty * bdi->max_ratio / 100;
368 397
369 *pbdi_dirty = bdi_dirty; 398 *pbdi_dirty = bdi_dirty;
370 clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); 399 clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty);