diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-04-30 03:54:35 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-30 11:29:50 -0400 |
commit | 189d3c4a94ef19fca2a71a6a336e9fda900e25e7 (patch) | |
tree | 75c0de871fe9922885a2fa073f15806d829342fa | |
parent | b6f2fcbcfca9db2bd7aa24940224fcd3bbdbb8aa (diff) |
mm: bdi: allow setting a minimum for the bdi dirty limit
Under normal circumstances each device is given a part of the total write-back
cache that relates to its current avg writeout speed in relation to the other
devices.
min_ratio - allows one to assign a minimum portion of the write-back cache to
a particular device. This is useful in situations where you might want to
provide a minimum QoS. (One request for this feature came from flash based
storage people who wanted to avoid writing out at all costs - they of course
needed some pdflush hacks as well)
max_ratio - allows one to assign a maximum portion of the dirty limit to a
particular device. This is useful in situations where you want to avoid one
device taking all or most of the write-back cache. Eg. an NFS mount that is
prone to get stuck, or a FUSE mount which you don't trust to play fair.
Add "min_ratio" to /sys/class/bdi. This indicates the minimum percentage of
the global dirty threshold allocated to this bdi.
[mszeredi@suse.cz]
- fix parsing in min_ratio_store()
- document new sysfs attribute
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/ABI/testing/sysfs-class-bdi | 6 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 4 | ||||
-rw-r--r-- | mm/backing-dev.c | 21 | ||||
-rw-r--r-- | mm/page-writeback.c | 27 |
4 files changed, 57 insertions, 1 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index b800cdda40bb..b9e8a9368dc6 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi | |||
@@ -44,3 +44,9 @@ bdi_dirty_kb (read-only) | |||
44 | Current threshold on this BDI for reclaimable + writeback | 44 | Current threshold on this BDI for reclaimable + writeback |
45 | memory | 45 | memory |
46 | 46 | ||
47 | min_ratio (read-write) | ||
48 | |||
49 | Minimal percentage of global dirty threshold allocated to this | ||
50 | bdi. If the value written to this file would make the the sum | ||
51 | of all min_ratio values exceed 100, then EINVAL is returned. | ||
52 | The default is zero | ||
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 6d513666d45c..9a8965518d1d 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -51,6 +51,8 @@ struct backing_dev_info { | |||
51 | struct prop_local_percpu completions; | 51 | struct prop_local_percpu completions; |
52 | int dirty_exceeded; | 52 | int dirty_exceeded; |
53 | 53 | ||
54 | unsigned int min_ratio; | ||
55 | |||
54 | struct device *dev; | 56 | struct device *dev; |
55 | }; | 57 | }; |
56 | 58 | ||
@@ -137,6 +139,8 @@ static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi) | |||
137 | #endif | 139 | #endif |
138 | } | 140 | } |
139 | 141 | ||
142 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); | ||
143 | |||
140 | /* | 144 | /* |
141 | * Flags in backing_dev_info::capability | 145 | * Flags in backing_dev_info::capability |
142 | * - The first two flags control whether dirty pages will contribute to the | 146 | * - The first two flags control whether dirty pages will contribute to the |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 847eabe4824c..4967fb176e53 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -55,6 +55,24 @@ static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i) | |||
55 | BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) | 55 | BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) |
56 | BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) | 56 | BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) |
57 | 57 | ||
58 | static ssize_t min_ratio_store(struct device *dev, | ||
59 | struct device_attribute *attr, const char *buf, size_t count) | ||
60 | { | ||
61 | struct backing_dev_info *bdi = dev_get_drvdata(dev); | ||
62 | char *end; | ||
63 | unsigned int ratio; | ||
64 | ssize_t ret = -EINVAL; | ||
65 | |||
66 | ratio = simple_strtoul(buf, &end, 10); | ||
67 | if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { | ||
68 | ret = bdi_set_min_ratio(bdi, ratio); | ||
69 | if (!ret) | ||
70 | ret = count; | ||
71 | } | ||
72 | return ret; | ||
73 | } | ||
74 | BDI_SHOW(min_ratio, bdi->min_ratio) | ||
75 | |||
58 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) | 76 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) |
59 | 77 | ||
60 | static struct device_attribute bdi_dev_attrs[] = { | 78 | static struct device_attribute bdi_dev_attrs[] = { |
@@ -63,6 +81,7 @@ static struct device_attribute bdi_dev_attrs[] = { | |||
63 | __ATTR_RO(writeback_kb), | 81 | __ATTR_RO(writeback_kb), |
64 | __ATTR_RO(dirty_kb), | 82 | __ATTR_RO(dirty_kb), |
65 | __ATTR_RO(bdi_dirty_kb), | 83 | __ATTR_RO(bdi_dirty_kb), |
84 | __ATTR_RW(min_ratio), | ||
66 | __ATTR_NULL, | 85 | __ATTR_NULL, |
67 | }; | 86 | }; |
68 | 87 | ||
@@ -127,6 +146,8 @@ int bdi_init(struct backing_dev_info *bdi) | |||
127 | 146 | ||
128 | bdi->dev = NULL; | 147 | bdi->dev = NULL; |
129 | 148 | ||
149 | bdi->min_ratio = 0; | ||
150 | |||
130 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 151 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
131 | err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); | 152 | err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); |
132 | if (err) | 153 | if (err) |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e5b6b1190a95..4ac077f4269c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -243,6 +243,29 @@ static void task_dirty_limit(struct task_struct *tsk, long *pdirty) | |||
243 | } | 243 | } |
244 | 244 | ||
245 | /* | 245 | /* |
246 | * | ||
247 | */ | ||
248 | static DEFINE_SPINLOCK(bdi_lock); | ||
249 | static unsigned int bdi_min_ratio; | ||
250 | |||
251 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | ||
252 | { | ||
253 | int ret = 0; | ||
254 | unsigned long flags; | ||
255 | |||
256 | spin_lock_irqsave(&bdi_lock, flags); | ||
257 | min_ratio -= bdi->min_ratio; | ||
258 | if (bdi_min_ratio + min_ratio < 100) { | ||
259 | bdi_min_ratio += min_ratio; | ||
260 | bdi->min_ratio += min_ratio; | ||
261 | } else | ||
262 | ret = -EINVAL; | ||
263 | spin_unlock_irqrestore(&bdi_lock, flags); | ||
264 | |||
265 | return ret; | ||
266 | } | ||
267 | |||
268 | /* | ||
246 | * Work out the current dirty-memory clamping and background writeout | 269 | * Work out the current dirty-memory clamping and background writeout |
247 | * thresholds. | 270 | * thresholds. |
248 | * | 271 | * |
@@ -330,7 +353,7 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, | |||
330 | *pdirty = dirty; | 353 | *pdirty = dirty; |
331 | 354 | ||
332 | if (bdi) { | 355 | if (bdi) { |
333 | u64 bdi_dirty = dirty; | 356 | u64 bdi_dirty; |
334 | long numerator, denominator; | 357 | long numerator, denominator; |
335 | 358 | ||
336 | /* | 359 | /* |
@@ -338,8 +361,10 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, | |||
338 | */ | 361 | */ |
339 | bdi_writeout_fraction(bdi, &numerator, &denominator); | 362 | bdi_writeout_fraction(bdi, &numerator, &denominator); |
340 | 363 | ||
364 | bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100; | ||
341 | bdi_dirty *= numerator; | 365 | bdi_dirty *= numerator; |
342 | do_div(bdi_dirty, denominator); | 366 | do_div(bdi_dirty, denominator); |
367 | bdi_dirty += (dirty * bdi->min_ratio) / 100; | ||
343 | 368 | ||
344 | *pbdi_dirty = bdi_dirty; | 369 | *pbdi_dirty = bdi_dirty; |
345 | clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); | 370 | clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); |