diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-04-30 03:54:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-30 11:29:50 -0400 |
commit | a42dde04152750426cc620fd277e80fffae2f65a (patch) | |
tree | 98cdf246280c59e0bf044ae6d2c229fc27b73203 | |
parent | 189d3c4a94ef19fca2a71a6a336e9fda900e25e7 (diff) |
mm: bdi: allow setting a maximum for the bdi dirty limit
Add "max_ratio" to /sys/class/bdi. This indicates the maximum percentage of
the global dirty threshold allocated to this bdi.
[mszeredi@suse.cz]
- fix parsing in max_ratio_store().
- export bdi_set_max_ratio() to modules
- limit bdi_dirty with bdi->max_ratio
- document new sysfs attribute
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/ABI/testing/sysfs-class-bdi | 9 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 2 | ||||
-rw-r--r-- | include/linux/proportions.h | 13 | ||||
-rw-r--r-- | lib/proportions.c | 38 | ||||
-rw-r--r-- | mm/backing-dev.c | 21 | ||||
-rw-r--r-- | mm/page-writeback.c | 41 |
6 files changed, 111 insertions, 13 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index b9e8a9368dc6..c55e811ca180 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi | |||
@@ -49,4 +49,11 @@ min_ratio (read-write) | |||
49 | Minimal percentage of global dirty threshold allocated to this | 49 | Minimal percentage of global dirty threshold allocated to this |
50 | bdi. If the value written to this file would make the the sum | 50 | bdi. If the value written to this file would make the the sum |
51 | of all min_ratio values exceed 100, then EINVAL is returned. | 51 | of all min_ratio values exceed 100, then EINVAL is returned. |
52 | The default is zero | 52 | If min_ratio would become larger than the current max_ratio, |
53 | then also EINVAL is returned. The default is zero | ||
54 | |||
55 | max_ratio (read-write) | ||
56 | |||
57 | Maximal percentage of global dirty threshold allocated to this | ||
58 | bdi. If max_ratio would become smaller than the current | ||
59 | min_ratio, then EINVAL is returned. The default is 100 | ||
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9a8965518d1d..ad3271d1e90a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -52,6 +52,7 @@ struct backing_dev_info { | |||
52 | int dirty_exceeded; | 52 | int dirty_exceeded; |
53 | 53 | ||
54 | unsigned int min_ratio; | 54 | unsigned int min_ratio; |
55 | unsigned int max_ratio, max_prop_frac; | ||
55 | 56 | ||
56 | struct device *dev; | 57 | struct device *dev; |
57 | }; | 58 | }; |
@@ -140,6 +141,7 @@ static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi) | |||
140 | } | 141 | } |
141 | 142 | ||
142 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); | 143 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); |
144 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); | ||
143 | 145 | ||
144 | /* | 146 | /* |
145 | * Flags in backing_dev_info::capability | 147 | * Flags in backing_dev_info::capability |
diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 2c3b3cad92be..5afc1b23346d 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h | |||
@@ -78,6 +78,19 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | |||
78 | } | 78 | } |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Limit the time part in order to ensure there are some bits left for the | ||
82 | * cycle counter and fraction multiply. | ||
83 | */ | ||
84 | #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) | ||
85 | |||
86 | #define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1) | ||
87 | #define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT) | ||
88 | |||
89 | void __prop_inc_percpu_max(struct prop_descriptor *pd, | ||
90 | struct prop_local_percpu *pl, long frac); | ||
91 | |||
92 | |||
93 | /* | ||
81 | * ----- SINGLE ------ | 94 | * ----- SINGLE ------ |
82 | */ | 95 | */ |
83 | 96 | ||
diff --git a/lib/proportions.c b/lib/proportions.c index 9508d9a7af3e..4f387a643d72 100644 --- a/lib/proportions.c +++ b/lib/proportions.c | |||
@@ -73,12 +73,6 @@ | |||
73 | #include <linux/proportions.h> | 73 | #include <linux/proportions.h> |
74 | #include <linux/rcupdate.h> | 74 | #include <linux/rcupdate.h> |
75 | 75 | ||
76 | /* | ||
77 | * Limit the time part in order to ensure there are some bits left for the | ||
78 | * cycle counter. | ||
79 | */ | ||
80 | #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) | ||
81 | |||
82 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) | 76 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) |
83 | { | 77 | { |
84 | int err; | 78 | int err; |
@@ -268,6 +262,38 @@ void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | |||
268 | } | 262 | } |
269 | 263 | ||
270 | /* | 264 | /* |
265 | * identical to __prop_inc_percpu, except that it limits this pl's fraction to | ||
266 | * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. | ||
267 | */ | ||
268 | void __prop_inc_percpu_max(struct prop_descriptor *pd, | ||
269 | struct prop_local_percpu *pl, long frac) | ||
270 | { | ||
271 | struct prop_global *pg = prop_get_global(pd); | ||
272 | |||
273 | prop_norm_percpu(pg, pl); | ||
274 | |||
275 | if (unlikely(frac != PROP_FRAC_BASE)) { | ||
276 | unsigned long period_2 = 1UL << (pg->shift - 1); | ||
277 | unsigned long counter_mask = period_2 - 1; | ||
278 | unsigned long global_count; | ||
279 | long numerator, denominator; | ||
280 | |||
281 | numerator = percpu_counter_read_positive(&pl->events); | ||
282 | global_count = percpu_counter_read(&pg->events); | ||
283 | denominator = period_2 + (global_count & counter_mask); | ||
284 | |||
285 | if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) | ||
286 | goto out_put; | ||
287 | } | ||
288 | |||
289 | percpu_counter_add(&pl->events, 1); | ||
290 | percpu_counter_add(&pg->events, 1); | ||
291 | |||
292 | out_put: | ||
293 | prop_put_global(pd, pg); | ||
294 | } | ||
295 | |||
296 | /* | ||
271 | * Obtain a fraction of this proportion | 297 | * Obtain a fraction of this proportion |
272 | * | 298 | * |
273 | * p_{j} = x_{j} / (period/2 + t % period/2) | 299 | * p_{j} = x_{j} / (period/2 + t % period/2) |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 4967fb176e53..08361b6aad50 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -73,6 +73,24 @@ static ssize_t min_ratio_store(struct device *dev, | |||
73 | } | 73 | } |
74 | BDI_SHOW(min_ratio, bdi->min_ratio) | 74 | BDI_SHOW(min_ratio, bdi->min_ratio) |
75 | 75 | ||
76 | static ssize_t max_ratio_store(struct device *dev, | ||
77 | struct device_attribute *attr, const char *buf, size_t count) | ||
78 | { | ||
79 | struct backing_dev_info *bdi = dev_get_drvdata(dev); | ||
80 | char *end; | ||
81 | unsigned int ratio; | ||
82 | ssize_t ret = -EINVAL; | ||
83 | |||
84 | ratio = simple_strtoul(buf, &end, 10); | ||
85 | if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { | ||
86 | ret = bdi_set_max_ratio(bdi, ratio); | ||
87 | if (!ret) | ||
88 | ret = count; | ||
89 | } | ||
90 | return ret; | ||
91 | } | ||
92 | BDI_SHOW(max_ratio, bdi->max_ratio) | ||
93 | |||
76 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) | 94 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) |
77 | 95 | ||
78 | static struct device_attribute bdi_dev_attrs[] = { | 96 | static struct device_attribute bdi_dev_attrs[] = { |
@@ -82,6 +100,7 @@ static struct device_attribute bdi_dev_attrs[] = { | |||
82 | __ATTR_RO(dirty_kb), | 100 | __ATTR_RO(dirty_kb), |
83 | __ATTR_RO(bdi_dirty_kb), | 101 | __ATTR_RO(bdi_dirty_kb), |
84 | __ATTR_RW(min_ratio), | 102 | __ATTR_RW(min_ratio), |
103 | __ATTR_RW(max_ratio), | ||
85 | __ATTR_NULL, | 104 | __ATTR_NULL, |
86 | }; | 105 | }; |
87 | 106 | ||
@@ -147,6 +166,8 @@ int bdi_init(struct backing_dev_info *bdi) | |||
147 | bdi->dev = NULL; | 166 | bdi->dev = NULL; |
148 | 167 | ||
149 | bdi->min_ratio = 0; | 168 | bdi->min_ratio = 0; |
169 | bdi->max_ratio = 100; | ||
170 | bdi->max_prop_frac = PROP_FRAC_BASE; | ||
150 | 171 | ||
151 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 172 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
152 | err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); | 173 | err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4ac077f4269c..2a9942f5387c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -164,7 +164,8 @@ int dirty_ratio_handler(struct ctl_table *table, int write, | |||
164 | */ | 164 | */ |
165 | static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) | 165 | static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) |
166 | { | 166 | { |
167 | __prop_inc_percpu(&vm_completions, &bdi->completions); | 167 | __prop_inc_percpu_max(&vm_completions, &bdi->completions, |
168 | bdi->max_prop_frac); | ||
168 | } | 169 | } |
169 | 170 | ||
170 | static inline void task_dirty_inc(struct task_struct *tsk) | 171 | static inline void task_dirty_inc(struct task_struct *tsk) |
@@ -254,16 +255,42 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
254 | unsigned long flags; | 255 | unsigned long flags; |
255 | 256 | ||
256 | spin_lock_irqsave(&bdi_lock, flags); | 257 | spin_lock_irqsave(&bdi_lock, flags); |
257 | min_ratio -= bdi->min_ratio; | 258 | if (min_ratio > bdi->max_ratio) { |
258 | if (bdi_min_ratio + min_ratio < 100) { | ||
259 | bdi_min_ratio += min_ratio; | ||
260 | bdi->min_ratio += min_ratio; | ||
261 | } else | ||
262 | ret = -EINVAL; | 259 | ret = -EINVAL; |
260 | } else { | ||
261 | min_ratio -= bdi->min_ratio; | ||
262 | if (bdi_min_ratio + min_ratio < 100) { | ||
263 | bdi_min_ratio += min_ratio; | ||
264 | bdi->min_ratio += min_ratio; | ||
265 | } else { | ||
266 | ret = -EINVAL; | ||
267 | } | ||
268 | } | ||
269 | spin_unlock_irqrestore(&bdi_lock, flags); | ||
270 | |||
271 | return ret; | ||
272 | } | ||
273 | |||
274 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) | ||
275 | { | ||
276 | unsigned long flags; | ||
277 | int ret = 0; | ||
278 | |||
279 | if (max_ratio > 100) | ||
280 | return -EINVAL; | ||
281 | |||
282 | spin_lock_irqsave(&bdi_lock, flags); | ||
283 | if (bdi->min_ratio > max_ratio) { | ||
284 | ret = -EINVAL; | ||
285 | } else { | ||
286 | bdi->max_ratio = max_ratio; | ||
287 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; | ||
288 | } | ||
263 | spin_unlock_irqrestore(&bdi_lock, flags); | 289 | spin_unlock_irqrestore(&bdi_lock, flags); |
264 | 290 | ||
265 | return ret; | 291 | return ret; |
266 | } | 292 | } |
293 | EXPORT_SYMBOL(bdi_set_max_ratio); | ||
267 | 294 | ||
268 | /* | 295 | /* |
269 | * Work out the current dirty-memory clamping and background writeout | 296 | * Work out the current dirty-memory clamping and background writeout |
@@ -365,6 +392,8 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, | |||
365 | bdi_dirty *= numerator; | 392 | bdi_dirty *= numerator; |
366 | do_div(bdi_dirty, denominator); | 393 | do_div(bdi_dirty, denominator); |
367 | bdi_dirty += (dirty * bdi->min_ratio) / 100; | 394 | bdi_dirty += (dirty * bdi->min_ratio) / 100; |
395 | if (bdi_dirty > (dirty * bdi->max_ratio) / 100) | ||
396 | bdi_dirty = dirty * bdi->max_ratio / 100; | ||
368 | 397 | ||
369 | *pbdi_dirty = bdi_dirty; | 398 | *pbdi_dirty = bdi_dirty; |
370 | clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); | 399 | clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); |