diff options
| author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-04-30 03:54:32 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-30 11:29:49 -0400 |
| commit | cf0ca9fe5dd9e3693d935757a7b2fc50fc576554 (patch) | |
| tree | c795c5271eda9fc67579fa3176c646b892dfdb41 | |
| parent | caafa4324335aeb11bc233d5f87aca8cce30beba (diff) | |
mm: bdi: export BDI attributes in sysfs
Provide a place in sysfs (/sys/class/bdi) for the backing_dev_info object.
This allows us to see and set the various BDI specific variables.
In particular this properly exposes the read-ahead window for all relevant
users and /sys/block/<block>/queue/read_ahead_kb should be deprecated.
With patient help from Kay Sievers and Greg KH
[mszeredi@suse.cz]
- split off NFS and FUSE changes into separate patches
- document new sysfs attributes under Documentation/ABI
- do bdi_class_init as a core_initcall, otherwise the "default" BDI
won't be initialized
- remove bdi_init_fmt macro, it's not used very much
[akpm@linux-foundation.org: fix ia64 warning]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Acked-by: Greg KH <greg@kroah.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | Documentation/ABI/testing/sysfs-class-bdi | 46 | ||||
| -rw-r--r-- | block/genhd.c | 8 | ||||
| -rw-r--r-- | include/linux/backing-dev.h | 9 | ||||
| -rw-r--r-- | include/linux/writeback.h | 3 | ||||
| -rw-r--r-- | lib/percpu_counter.c | 1 | ||||
| -rw-r--r-- | mm/backing-dev.c | 119 | ||||
| -rw-r--r-- | mm/page-writeback.c | 2 | ||||
| -rw-r--r-- | mm/readahead.c | 8 |
8 files changed, 194 insertions, 2 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi new file mode 100644 index 000000000000..b800cdda40bb --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-bdi | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | What: /sys/class/bdi/<bdi>/ | ||
| 2 | Date: January 2008 | ||
| 3 | Contact: Peter Zijlstra <a.p.zijlstra@chello.nl> | ||
| 4 | Description: | ||
| 5 | |||
| 6 | Provide a place in sysfs for the backing_dev_info object. | ||
| 7 | This allows us to see and set the various BDI specific variables. | ||
| 8 | |||
| 9 | The <bdi> identifier can be either of the following: | ||
| 10 | |||
| 11 | MAJOR:MINOR | ||
| 12 | |||
| 13 | Device number for block devices, or value of st_dev on | ||
| 14 | non-block filesystems which provide their own BDI, such as NFS | ||
| 15 | and FUSE. | ||
| 16 | |||
| 17 | default | ||
| 18 | |||
| 19 | The default backing dev, used for non-block device backed | ||
| 20 | filesystems which do not provide their own BDI. | ||
| 21 | |||
| 22 | Files under /sys/class/bdi/<bdi>/ | ||
| 23 | --------------------------------- | ||
| 24 | |||
| 25 | read_ahead_kb (read-write) | ||
| 26 | |||
| 27 | Size of the read-ahead window in kilobytes | ||
| 28 | |||
| 29 | reclaimable_kb (read-only) | ||
| 30 | |||
| 31 | Reclaimable (dirty or unstable) memory destined for writeback | ||
| 32 | to this device | ||
| 33 | |||
| 34 | writeback_kb (read-only) | ||
| 35 | |||
| 36 | Memory currently under writeback to this device | ||
| 37 | |||
| 38 | dirty_kb (read-only) | ||
| 39 | |||
| 40 | Global threshold for reclaimable + writeback memory | ||
| 41 | |||
| 42 | bdi_dirty_kb (read-only) | ||
| 43 | |||
| 44 | Current threshold on this BDI for reclaimable + writeback | ||
| 45 | memory | ||
| 46 | |||
diff --git a/block/genhd.c b/block/genhd.c index 00da5219ee37..fda9c7a63c29 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
| @@ -182,11 +182,17 @@ static int exact_lock(dev_t devt, void *data) | |||
| 182 | */ | 182 | */ |
| 183 | void add_disk(struct gendisk *disk) | 183 | void add_disk(struct gendisk *disk) |
| 184 | { | 184 | { |
| 185 | struct backing_dev_info *bdi; | ||
| 186 | |||
| 185 | disk->flags |= GENHD_FL_UP; | 187 | disk->flags |= GENHD_FL_UP; |
| 186 | blk_register_region(MKDEV(disk->major, disk->first_minor), | 188 | blk_register_region(MKDEV(disk->major, disk->first_minor), |
| 187 | disk->minors, NULL, exact_match, exact_lock, disk); | 189 | disk->minors, NULL, exact_match, exact_lock, disk); |
| 188 | register_disk(disk); | 190 | register_disk(disk); |
| 189 | blk_register_queue(disk); | 191 | blk_register_queue(disk); |
| 192 | |||
| 193 | bdi = &disk->queue->backing_dev_info; | ||
| 194 | bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); | ||
| 195 | sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); | ||
| 190 | } | 196 | } |
| 191 | 197 | ||
| 192 | EXPORT_SYMBOL(add_disk); | 198 | EXPORT_SYMBOL(add_disk); |
| @@ -194,6 +200,8 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ | |||
| 194 | 200 | ||
| 195 | void unlink_gendisk(struct gendisk *disk) | 201 | void unlink_gendisk(struct gendisk *disk) |
| 196 | { | 202 | { |
| 203 | sysfs_remove_link(&disk->dev.kobj, "bdi"); | ||
| 204 | bdi_unregister(&disk->queue->backing_dev_info); | ||
| 197 | blk_unregister_queue(disk); | 205 | blk_unregister_queue(disk); |
| 198 | blk_unregister_region(MKDEV(disk->major, disk->first_minor), | 206 | blk_unregister_region(MKDEV(disk->major, disk->first_minor), |
| 199 | disk->minors); | 207 | disk->minors); |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index b66fa2bdfd9c..6d513666d45c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
| @@ -11,9 +11,11 @@ | |||
| 11 | #include <linux/percpu_counter.h> | 11 | #include <linux/percpu_counter.h> |
| 12 | #include <linux/log2.h> | 12 | #include <linux/log2.h> |
| 13 | #include <linux/proportions.h> | 13 | #include <linux/proportions.h> |
| 14 | #include <linux/kernel.h> | ||
| 14 | #include <asm/atomic.h> | 15 | #include <asm/atomic.h> |
| 15 | 16 | ||
| 16 | struct page; | 17 | struct page; |
| 18 | struct device; | ||
| 17 | 19 | ||
| 18 | /* | 20 | /* |
| 19 | * Bits in backing_dev_info.state | 21 | * Bits in backing_dev_info.state |
| @@ -48,11 +50,18 @@ struct backing_dev_info { | |||
| 48 | 50 | ||
| 49 | struct prop_local_percpu completions; | 51 | struct prop_local_percpu completions; |
| 50 | int dirty_exceeded; | 52 | int dirty_exceeded; |
| 53 | |||
| 54 | struct device *dev; | ||
| 51 | }; | 55 | }; |
| 52 | 56 | ||
| 53 | int bdi_init(struct backing_dev_info *bdi); | 57 | int bdi_init(struct backing_dev_info *bdi); |
| 54 | void bdi_destroy(struct backing_dev_info *bdi); | 58 | void bdi_destroy(struct backing_dev_info *bdi); |
| 55 | 59 | ||
| 60 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | ||
| 61 | const char *fmt, ...); | ||
| 62 | int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); | ||
| 63 | void bdi_unregister(struct backing_dev_info *bdi); | ||
| 64 | |||
| 56 | static inline void __add_bdi_stat(struct backing_dev_info *bdi, | 65 | static inline void __add_bdi_stat(struct backing_dev_info *bdi, |
| 57 | enum bdi_stat_item item, s64 amount) | 66 | enum bdi_stat_item item, s64 amount) |
| 58 | { | 67 | { |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b7b3362f7717..f462439cc288 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
| @@ -114,6 +114,9 @@ struct file; | |||
| 114 | int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, | 114 | int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, |
| 115 | void __user *, size_t *, loff_t *); | 115 | void __user *, size_t *, loff_t *); |
| 116 | 116 | ||
| 117 | void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, | ||
| 118 | struct backing_dev_info *bdi); | ||
| 119 | |||
| 117 | void page_writeback_init(void); | 120 | void page_writeback_init(void); |
| 118 | void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, | 121 | void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, |
| 119 | unsigned long nr_pages_dirtied); | 122 | unsigned long nr_pages_dirtied); |
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 393a0e915c23..119174494cb5 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c | |||
| @@ -102,6 +102,7 @@ void percpu_counter_destroy(struct percpu_counter *fbc) | |||
| 102 | return; | 102 | return; |
| 103 | 103 | ||
| 104 | free_percpu(fbc->counters); | 104 | free_percpu(fbc->counters); |
| 105 | fbc->counters = NULL; | ||
| 105 | #ifdef CONFIG_HOTPLUG_CPU | 106 | #ifdef CONFIG_HOTPLUG_CPU |
| 106 | mutex_lock(&percpu_counters_lock); | 107 | mutex_lock(&percpu_counters_lock); |
| 107 | list_del(&fbc->list); | 108 | list_del(&fbc->list); |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index e8644b1e5527..847eabe4824c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
| @@ -4,12 +4,129 @@ | |||
| 4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
| 5 | #include <linux/sched.h> | 5 | #include <linux/sched.h> |
| 6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
| 7 | #include <linux/writeback.h> | ||
| 8 | #include <linux/device.h> | ||
| 9 | |||
| 10 | |||
| 11 | static struct class *bdi_class; | ||
| 12 | |||
| 13 | static ssize_t read_ahead_kb_store(struct device *dev, | ||
| 14 | struct device_attribute *attr, | ||
| 15 | const char *buf, size_t count) | ||
| 16 | { | ||
| 17 | struct backing_dev_info *bdi = dev_get_drvdata(dev); | ||
| 18 | char *end; | ||
| 19 | unsigned long read_ahead_kb; | ||
| 20 | ssize_t ret = -EINVAL; | ||
| 21 | |||
| 22 | read_ahead_kb = simple_strtoul(buf, &end, 10); | ||
| 23 | if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { | ||
| 24 | bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10); | ||
| 25 | ret = count; | ||
| 26 | } | ||
| 27 | return ret; | ||
| 28 | } | ||
| 29 | |||
| 30 | #define K(pages) ((pages) << (PAGE_SHIFT - 10)) | ||
| 31 | |||
| 32 | #define BDI_SHOW(name, expr) \ | ||
| 33 | static ssize_t name##_show(struct device *dev, \ | ||
| 34 | struct device_attribute *attr, char *page) \ | ||
| 35 | { \ | ||
| 36 | struct backing_dev_info *bdi = dev_get_drvdata(dev); \ | ||
| 37 | \ | ||
| 38 | return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \ | ||
| 39 | } | ||
| 40 | |||
| 41 | BDI_SHOW(read_ahead_kb, K(bdi->ra_pages)) | ||
| 42 | |||
| 43 | BDI_SHOW(reclaimable_kb, K(bdi_stat(bdi, BDI_RECLAIMABLE))) | ||
| 44 | BDI_SHOW(writeback_kb, K(bdi_stat(bdi, BDI_WRITEBACK))) | ||
| 45 | |||
| 46 | static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i) | ||
| 47 | { | ||
| 48 | unsigned long thresh[3]; | ||
| 49 | |||
| 50 | get_dirty_limits(&thresh[0], &thresh[1], &thresh[2], bdi); | ||
| 51 | |||
| 52 | return thresh[i]; | ||
| 53 | } | ||
| 54 | |||
| 55 | BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) | ||
| 56 | BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) | ||
| 57 | |||
| 58 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) | ||
| 59 | |||
| 60 | static struct device_attribute bdi_dev_attrs[] = { | ||
| 61 | __ATTR_RW(read_ahead_kb), | ||
| 62 | __ATTR_RO(reclaimable_kb), | ||
| 63 | __ATTR_RO(writeback_kb), | ||
| 64 | __ATTR_RO(dirty_kb), | ||
| 65 | __ATTR_RO(bdi_dirty_kb), | ||
| 66 | __ATTR_NULL, | ||
| 67 | }; | ||
| 68 | |||
| 69 | static __init int bdi_class_init(void) | ||
| 70 | { | ||
| 71 | bdi_class = class_create(THIS_MODULE, "bdi"); | ||
| 72 | bdi_class->dev_attrs = bdi_dev_attrs; | ||
| 73 | return 0; | ||
| 74 | } | ||
| 75 | |||
| 76 | core_initcall(bdi_class_init); | ||
| 77 | |||
| 78 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | ||
| 79 | const char *fmt, ...) | ||
| 80 | { | ||
| 81 | char *name; | ||
| 82 | va_list args; | ||
| 83 | int ret = 0; | ||
| 84 | struct device *dev; | ||
| 85 | |||
| 86 | va_start(args, fmt); | ||
| 87 | name = kvasprintf(GFP_KERNEL, fmt, args); | ||
| 88 | va_end(args); | ||
| 89 | |||
| 90 | if (!name) | ||
| 91 | return -ENOMEM; | ||
| 92 | |||
| 93 | dev = device_create(bdi_class, parent, MKDEV(0, 0), name); | ||
| 94 | if (IS_ERR(dev)) { | ||
| 95 | ret = PTR_ERR(dev); | ||
| 96 | goto exit; | ||
| 97 | } | ||
| 98 | |||
| 99 | bdi->dev = dev; | ||
| 100 | dev_set_drvdata(bdi->dev, bdi); | ||
| 101 | |||
| 102 | exit: | ||
| 103 | kfree(name); | ||
| 104 | return ret; | ||
| 105 | } | ||
| 106 | EXPORT_SYMBOL(bdi_register); | ||
| 107 | |||
| 108 | int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) | ||
| 109 | { | ||
| 110 | return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev)); | ||
| 111 | } | ||
| 112 | EXPORT_SYMBOL(bdi_register_dev); | ||
| 113 | |||
| 114 | void bdi_unregister(struct backing_dev_info *bdi) | ||
| 115 | { | ||
| 116 | if (bdi->dev) { | ||
| 117 | device_unregister(bdi->dev); | ||
| 118 | bdi->dev = NULL; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | EXPORT_SYMBOL(bdi_unregister); | ||
| 7 | 122 | ||
| 8 | int bdi_init(struct backing_dev_info *bdi) | 123 | int bdi_init(struct backing_dev_info *bdi) |
| 9 | { | 124 | { |
| 10 | int i; | 125 | int i; |
| 11 | int err; | 126 | int err; |
| 12 | 127 | ||
| 128 | bdi->dev = NULL; | ||
| 129 | |||
| 13 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 130 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
| 14 | err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); | 131 | err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); |
| 15 | if (err) | 132 | if (err) |
| @@ -33,6 +150,8 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
| 33 | { | 150 | { |
| 34 | int i; | 151 | int i; |
| 35 | 152 | ||
| 153 | bdi_unregister(bdi); | ||
| 154 | |||
| 36 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) | 155 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) |
| 37 | percpu_counter_destroy(&bdi->bdi_stat[i]); | 156 | percpu_counter_destroy(&bdi->bdi_stat[i]); |
| 38 | 157 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5e00f1772c20..e5b6b1190a95 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -300,7 +300,7 @@ static unsigned long determine_dirtyable_memory(void) | |||
| 300 | return x + 1; /* Ensure that we never return 0 */ | 300 | return x + 1; /* Ensure that we never return 0 */ |
| 301 | } | 301 | } |
| 302 | 302 | ||
| 303 | static void | 303 | void |
| 304 | get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, | 304 | get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, |
| 305 | struct backing_dev_info *bdi) | 305 | struct backing_dev_info *bdi) |
| 306 | { | 306 | { |
diff --git a/mm/readahead.c b/mm/readahead.c index 8762e8988972..d8723a5f6496 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
| @@ -235,7 +235,13 @@ unsigned long max_sane_readahead(unsigned long nr) | |||
| 235 | 235 | ||
| 236 | static int __init readahead_init(void) | 236 | static int __init readahead_init(void) |
| 237 | { | 237 | { |
| 238 | return bdi_init(&default_backing_dev_info); | 238 | int err; |
| 239 | |||
| 240 | err = bdi_init(&default_backing_dev_info); | ||
| 241 | if (!err) | ||
| 242 | bdi_register(&default_backing_dev_info, NULL, "default"); | ||
| 243 | |||
| 244 | return err; | ||
| 239 | } | 245 | } |
| 240 | subsys_initcall(readahead_init); | 246 | subsys_initcall(readahead_init); |
| 241 | 247 | ||
