diff options
| author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-04-30 03:54:36 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-30 11:29:50 -0400 |
| commit | a42dde04152750426cc620fd277e80fffae2f65a (patch) | |
| tree | 98cdf246280c59e0bf044ae6d2c229fc27b73203 | |
| parent | 189d3c4a94ef19fca2a71a6a336e9fda900e25e7 (diff) | |
mm: bdi: allow setting a maximum for the bdi dirty limit
Add "max_ratio" to /sys/class/bdi. This indicates the maximum percentage of
the global dirty threshold allocated to this bdi.
[mszeredi@suse.cz]
- fix parsing in max_ratio_store().
- export bdi_set_max_ratio() to modules
- limit bdi_dirty with bdi->max_ratio
- document new sysfs attribute
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | Documentation/ABI/testing/sysfs-class-bdi | 9 | ||||
| -rw-r--r-- | include/linux/backing-dev.h | 2 | ||||
| -rw-r--r-- | include/linux/proportions.h | 13 | ||||
| -rw-r--r-- | lib/proportions.c | 38 | ||||
| -rw-r--r-- | mm/backing-dev.c | 21 | ||||
| -rw-r--r-- | mm/page-writeback.c | 41 |
6 files changed, 111 insertions, 13 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index b9e8a9368dc6..c55e811ca180 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi | |||
| @@ -49,4 +49,11 @@ min_ratio (read-write) | |||
| 49 | Minimal percentage of global dirty threshold allocated to this | 49 | Minimal percentage of global dirty threshold allocated to this |
| 50 | bdi. If the value written to this file would make the the sum | 50 | bdi. If the value written to this file would make the the sum |
| 51 | of all min_ratio values exceed 100, then EINVAL is returned. | 51 | of all min_ratio values exceed 100, then EINVAL is returned. |
| 52 | The default is zero | 52 | If min_ratio would become larger than the current max_ratio, |
| 53 | then also EINVAL is returned. The default is zero | ||
| 54 | |||
| 55 | max_ratio (read-write) | ||
| 56 | |||
| 57 | Maximal percentage of global dirty threshold allocated to this | ||
| 58 | bdi. If max_ratio would become smaller than the current | ||
| 59 | min_ratio, then EINVAL is returned. The default is 100 | ||
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9a8965518d1d..ad3271d1e90a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
| @@ -52,6 +52,7 @@ struct backing_dev_info { | |||
| 52 | int dirty_exceeded; | 52 | int dirty_exceeded; |
| 53 | 53 | ||
| 54 | unsigned int min_ratio; | 54 | unsigned int min_ratio; |
| 55 | unsigned int max_ratio, max_prop_frac; | ||
| 55 | 56 | ||
| 56 | struct device *dev; | 57 | struct device *dev; |
| 57 | }; | 58 | }; |
| @@ -140,6 +141,7 @@ static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi) | |||
| 140 | } | 141 | } |
| 141 | 142 | ||
| 142 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); | 143 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); |
| 144 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); | ||
| 143 | 145 | ||
| 144 | /* | 146 | /* |
| 145 | * Flags in backing_dev_info::capability | 147 | * Flags in backing_dev_info::capability |
diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 2c3b3cad92be..5afc1b23346d 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h | |||
| @@ -78,6 +78,19 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | |||
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | /* | 80 | /* |
| 81 | * Limit the time part in order to ensure there are some bits left for the | ||
| 82 | * cycle counter and fraction multiply. | ||
| 83 | */ | ||
| 84 | #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) | ||
| 85 | |||
| 86 | #define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1) | ||
| 87 | #define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT) | ||
| 88 | |||
| 89 | void __prop_inc_percpu_max(struct prop_descriptor *pd, | ||
| 90 | struct prop_local_percpu *pl, long frac); | ||
| 91 | |||
| 92 | |||
| 93 | /* | ||
| 81 | * ----- SINGLE ------ | 94 | * ----- SINGLE ------ |
| 82 | */ | 95 | */ |
| 83 | 96 | ||
diff --git a/lib/proportions.c b/lib/proportions.c index 9508d9a7af3e..4f387a643d72 100644 --- a/lib/proportions.c +++ b/lib/proportions.c | |||
| @@ -73,12 +73,6 @@ | |||
| 73 | #include <linux/proportions.h> | 73 | #include <linux/proportions.h> |
| 74 | #include <linux/rcupdate.h> | 74 | #include <linux/rcupdate.h> |
| 75 | 75 | ||
| 76 | /* | ||
| 77 | * Limit the time part in order to ensure there are some bits left for the | ||
| 78 | * cycle counter. | ||
| 79 | */ | ||
| 80 | #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) | ||
| 81 | |||
| 82 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) | 76 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) |
| 83 | { | 77 | { |
| 84 | int err; | 78 | int err; |
| @@ -268,6 +262,38 @@ void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | |||
| 268 | } | 262 | } |
| 269 | 263 | ||
| 270 | /* | 264 | /* |
| 265 | * identical to __prop_inc_percpu, except that it limits this pl's fraction to | ||
| 266 | * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. | ||
| 267 | */ | ||
| 268 | void __prop_inc_percpu_max(struct prop_descriptor *pd, | ||
| 269 | struct prop_local_percpu *pl, long frac) | ||
| 270 | { | ||
| 271 | struct prop_global *pg = prop_get_global(pd); | ||
| 272 | |||
| 273 | prop_norm_percpu(pg, pl); | ||
| 274 | |||
| 275 | if (unlikely(frac != PROP_FRAC_BASE)) { | ||
| 276 | unsigned long period_2 = 1UL << (pg->shift - 1); | ||
| 277 | unsigned long counter_mask = period_2 - 1; | ||
| 278 | unsigned long global_count; | ||
| 279 | long numerator, denominator; | ||
| 280 | |||
| 281 | numerator = percpu_counter_read_positive(&pl->events); | ||
| 282 | global_count = percpu_counter_read(&pg->events); | ||
| 283 | denominator = period_2 + (global_count & counter_mask); | ||
| 284 | |||
| 285 | if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) | ||
| 286 | goto out_put; | ||
| 287 | } | ||
| 288 | |||
| 289 | percpu_counter_add(&pl->events, 1); | ||
| 290 | percpu_counter_add(&pg->events, 1); | ||
| 291 | |||
| 292 | out_put: | ||
| 293 | prop_put_global(pd, pg); | ||
| 294 | } | ||
| 295 | |||
| 296 | /* | ||
| 271 | * Obtain a fraction of this proportion | 297 | * Obtain a fraction of this proportion |
| 272 | * | 298 | * |
| 273 | * p_{j} = x_{j} / (period/2 + t % period/2) | 299 | * p_{j} = x_{j} / (period/2 + t % period/2) |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 4967fb176e53..08361b6aad50 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
| @@ -73,6 +73,24 @@ static ssize_t min_ratio_store(struct device *dev, | |||
| 73 | } | 73 | } |
| 74 | BDI_SHOW(min_ratio, bdi->min_ratio) | 74 | BDI_SHOW(min_ratio, bdi->min_ratio) |
| 75 | 75 | ||
| 76 | static ssize_t max_ratio_store(struct device *dev, | ||
| 77 | struct device_attribute *attr, const char *buf, size_t count) | ||
| 78 | { | ||
| 79 | struct backing_dev_info *bdi = dev_get_drvdata(dev); | ||
| 80 | char *end; | ||
| 81 | unsigned int ratio; | ||
| 82 | ssize_t ret = -EINVAL; | ||
| 83 | |||
| 84 | ratio = simple_strtoul(buf, &end, 10); | ||
| 85 | if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { | ||
| 86 | ret = bdi_set_max_ratio(bdi, ratio); | ||
| 87 | if (!ret) | ||
| 88 | ret = count; | ||
| 89 | } | ||
| 90 | return ret; | ||
| 91 | } | ||
| 92 | BDI_SHOW(max_ratio, bdi->max_ratio) | ||
| 93 | |||
| 76 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) | 94 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) |
| 77 | 95 | ||
| 78 | static struct device_attribute bdi_dev_attrs[] = { | 96 | static struct device_attribute bdi_dev_attrs[] = { |
| @@ -82,6 +100,7 @@ static struct device_attribute bdi_dev_attrs[] = { | |||
| 82 | __ATTR_RO(dirty_kb), | 100 | __ATTR_RO(dirty_kb), |
| 83 | __ATTR_RO(bdi_dirty_kb), | 101 | __ATTR_RO(bdi_dirty_kb), |
| 84 | __ATTR_RW(min_ratio), | 102 | __ATTR_RW(min_ratio), |
| 103 | __ATTR_RW(max_ratio), | ||
| 85 | __ATTR_NULL, | 104 | __ATTR_NULL, |
| 86 | }; | 105 | }; |
| 87 | 106 | ||
| @@ -147,6 +166,8 @@ int bdi_init(struct backing_dev_info *bdi) | |||
| 147 | bdi->dev = NULL; | 166 | bdi->dev = NULL; |
| 148 | 167 | ||
| 149 | bdi->min_ratio = 0; | 168 | bdi->min_ratio = 0; |
| 169 | bdi->max_ratio = 100; | ||
| 170 | bdi->max_prop_frac = PROP_FRAC_BASE; | ||
| 150 | 171 | ||
| 151 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 172 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
| 152 | err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); | 173 | err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4ac077f4269c..2a9942f5387c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -164,7 +164,8 @@ int dirty_ratio_handler(struct ctl_table *table, int write, | |||
| 164 | */ | 164 | */ |
| 165 | static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) | 165 | static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) |
| 166 | { | 166 | { |
| 167 | __prop_inc_percpu(&vm_completions, &bdi->completions); | 167 | __prop_inc_percpu_max(&vm_completions, &bdi->completions, |
| 168 | bdi->max_prop_frac); | ||
| 168 | } | 169 | } |
| 169 | 170 | ||
| 170 | static inline void task_dirty_inc(struct task_struct *tsk) | 171 | static inline void task_dirty_inc(struct task_struct *tsk) |
| @@ -254,16 +255,42 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
| 254 | unsigned long flags; | 255 | unsigned long flags; |
| 255 | 256 | ||
| 256 | spin_lock_irqsave(&bdi_lock, flags); | 257 | spin_lock_irqsave(&bdi_lock, flags); |
| 257 | min_ratio -= bdi->min_ratio; | 258 | if (min_ratio > bdi->max_ratio) { |
| 258 | if (bdi_min_ratio + min_ratio < 100) { | ||
| 259 | bdi_min_ratio += min_ratio; | ||
| 260 | bdi->min_ratio += min_ratio; | ||
| 261 | } else | ||
| 262 | ret = -EINVAL; | 259 | ret = -EINVAL; |
| 260 | } else { | ||
| 261 | min_ratio -= bdi->min_ratio; | ||
| 262 | if (bdi_min_ratio + min_ratio < 100) { | ||
| 263 | bdi_min_ratio += min_ratio; | ||
| 264 | bdi->min_ratio += min_ratio; | ||
| 265 | } else { | ||
| 266 | ret = -EINVAL; | ||
| 267 | } | ||
| 268 | } | ||
| 269 | spin_unlock_irqrestore(&bdi_lock, flags); | ||
| 270 | |||
| 271 | return ret; | ||
| 272 | } | ||
| 273 | |||
| 274 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) | ||
| 275 | { | ||
| 276 | unsigned long flags; | ||
| 277 | int ret = 0; | ||
| 278 | |||
| 279 | if (max_ratio > 100) | ||
| 280 | return -EINVAL; | ||
| 281 | |||
| 282 | spin_lock_irqsave(&bdi_lock, flags); | ||
| 283 | if (bdi->min_ratio > max_ratio) { | ||
| 284 | ret = -EINVAL; | ||
| 285 | } else { | ||
| 286 | bdi->max_ratio = max_ratio; | ||
| 287 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; | ||
| 288 | } | ||
| 263 | spin_unlock_irqrestore(&bdi_lock, flags); | 289 | spin_unlock_irqrestore(&bdi_lock, flags); |
| 264 | 290 | ||
| 265 | return ret; | 291 | return ret; |
| 266 | } | 292 | } |
| 293 | EXPORT_SYMBOL(bdi_set_max_ratio); | ||
| 267 | 294 | ||
| 268 | /* | 295 | /* |
| 269 | * Work out the current dirty-memory clamping and background writeout | 296 | * Work out the current dirty-memory clamping and background writeout |
| @@ -365,6 +392,8 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, | |||
| 365 | bdi_dirty *= numerator; | 392 | bdi_dirty *= numerator; |
| 366 | do_div(bdi_dirty, denominator); | 393 | do_div(bdi_dirty, denominator); |
| 367 | bdi_dirty += (dirty * bdi->min_ratio) / 100; | 394 | bdi_dirty += (dirty * bdi->min_ratio) / 100; |
| 395 | if (bdi_dirty > (dirty * bdi->max_ratio) / 100) | ||
| 396 | bdi_dirty = dirty * bdi->max_ratio / 100; | ||
| 368 | 397 | ||
| 369 | *pbdi_dirty = bdi_dirty; | 398 | *pbdi_dirty = bdi_dirty; |
| 370 | clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); | 399 | clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); |
