aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-04-30 03:54:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-30 11:29:49 -0400
commitcf0ca9fe5dd9e3693d935757a7b2fc50fc576554 (patch)
treec795c5271eda9fc67579fa3176c646b892dfdb41
parentcaafa4324335aeb11bc233d5f87aca8cce30beba (diff)
mm: bdi: export BDI attributes in sysfs
Provide a place in sysfs (/sys/class/bdi) for the backing_dev_info object. This allows us to see and set the various BDI specific variables. In particular this properly exposes the read-ahead window for all relevant users and /sys/block/<block>/queue/read_ahead_kb should be deprecated. With patient help from Kay Sievers and Greg KH [mszeredi@suse.cz] - split off NFS and FUSE changes into separate patches - document new sysfs attributes under Documentation/ABI - do bdi_class_init as a core_initcall, otherwise the "default" BDI won't be initialized - remove bdi_init_fmt macro, it's not used very much [akpm@linux-foundation.org: fix ia64 warning] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Kay Sievers <kay.sievers@vrfy.org> Acked-by: Greg KH <greg@kroah.com> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/ABI/testing/sysfs-class-bdi46
-rw-r--r--block/genhd.c8
-rw-r--r--include/linux/backing-dev.h9
-rw-r--r--include/linux/writeback.h3
-rw-r--r--lib/percpu_counter.c1
-rw-r--r--mm/backing-dev.c119
-rw-r--r--mm/page-writeback.c2
-rw-r--r--mm/readahead.c8
8 files changed, 194 insertions, 2 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
new file mode 100644
index 000000000000..b800cdda40bb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -0,0 +1,46 @@
1What: /sys/class/bdi/<bdi>/
2Date: January 2008
3Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
4Description:
5
6Provide a place in sysfs for the backing_dev_info object.
7This allows us to see and set the various BDI specific variables.
8
9The <bdi> identifier can be either of the following:
10
11MAJOR:MINOR
12
13 Device number for block devices, or value of st_dev on
14 non-block filesystems which provide their own BDI, such as NFS
15 and FUSE.
16
17default
18
19 The default backing dev, used for non-block device backed
20 filesystems which do not provide their own BDI.
21
22Files under /sys/class/bdi/<bdi>/
23---------------------------------
24
25read_ahead_kb (read-write)
26
27 Size of the read-ahead window in kilobytes
28
29reclaimable_kb (read-only)
30
31 Reclaimable (dirty or unstable) memory destined for writeback
32 to this device
33
34writeback_kb (read-only)
35
36 Memory currently under writeback to this device
37
38dirty_kb (read-only)
39
40 Global threshold for reclaimable + writeback memory
41
42bdi_dirty_kb (read-only)
43
44 Current threshold on this BDI for reclaimable + writeback
45 memory
46
diff --git a/block/genhd.c b/block/genhd.c
index 00da5219ee37..fda9c7a63c29 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -182,11 +182,17 @@ static int exact_lock(dev_t devt, void *data)
182 */ 182 */
183void add_disk(struct gendisk *disk) 183void add_disk(struct gendisk *disk)
184{ 184{
185 struct backing_dev_info *bdi;
186
185 disk->flags |= GENHD_FL_UP; 187 disk->flags |= GENHD_FL_UP;
186 blk_register_region(MKDEV(disk->major, disk->first_minor), 188 blk_register_region(MKDEV(disk->major, disk->first_minor),
187 disk->minors, NULL, exact_match, exact_lock, disk); 189 disk->minors, NULL, exact_match, exact_lock, disk);
188 register_disk(disk); 190 register_disk(disk);
189 blk_register_queue(disk); 191 blk_register_queue(disk);
192
193 bdi = &disk->queue->backing_dev_info;
194 bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
195 sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi");
190} 196}
191 197
192EXPORT_SYMBOL(add_disk); 198EXPORT_SYMBOL(add_disk);
@@ -194,6 +200,8 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
194 200
195void unlink_gendisk(struct gendisk *disk) 201void unlink_gendisk(struct gendisk *disk)
196{ 202{
203 sysfs_remove_link(&disk->dev.kobj, "bdi");
204 bdi_unregister(&disk->queue->backing_dev_info);
197 blk_unregister_queue(disk); 205 blk_unregister_queue(disk);
198 blk_unregister_region(MKDEV(disk->major, disk->first_minor), 206 blk_unregister_region(MKDEV(disk->major, disk->first_minor),
199 disk->minors); 207 disk->minors);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index b66fa2bdfd9c..6d513666d45c 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -11,9 +11,11 @@
11#include <linux/percpu_counter.h> 11#include <linux/percpu_counter.h>
12#include <linux/log2.h> 12#include <linux/log2.h>
13#include <linux/proportions.h> 13#include <linux/proportions.h>
14#include <linux/kernel.h>
14#include <asm/atomic.h> 15#include <asm/atomic.h>
15 16
16struct page; 17struct page;
18struct device;
17 19
18/* 20/*
19 * Bits in backing_dev_info.state 21 * Bits in backing_dev_info.state
@@ -48,11 +50,18 @@ struct backing_dev_info {
48 50
49 struct prop_local_percpu completions; 51 struct prop_local_percpu completions;
50 int dirty_exceeded; 52 int dirty_exceeded;
53
54 struct device *dev;
51}; 55};
52 56
53int bdi_init(struct backing_dev_info *bdi); 57int bdi_init(struct backing_dev_info *bdi);
54void bdi_destroy(struct backing_dev_info *bdi); 58void bdi_destroy(struct backing_dev_info *bdi);
55 59
60int bdi_register(struct backing_dev_info *bdi, struct device *parent,
61 const char *fmt, ...);
62int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
63void bdi_unregister(struct backing_dev_info *bdi);
64
56static inline void __add_bdi_stat(struct backing_dev_info *bdi, 65static inline void __add_bdi_stat(struct backing_dev_info *bdi,
57 enum bdi_stat_item item, s64 amount) 66 enum bdi_stat_item item, s64 amount)
58{ 67{
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b7b3362f7717..f462439cc288 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -114,6 +114,9 @@ struct file;
114int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, 114int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
115 void __user *, size_t *, loff_t *); 115 void __user *, size_t *, loff_t *);
116 116
117void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
118 struct backing_dev_info *bdi);
119
117void page_writeback_init(void); 120void page_writeback_init(void);
118void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, 121void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
119 unsigned long nr_pages_dirtied); 122 unsigned long nr_pages_dirtied);
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 393a0e915c23..119174494cb5 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -102,6 +102,7 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
102 return; 102 return;
103 103
104 free_percpu(fbc->counters); 104 free_percpu(fbc->counters);
105 fbc->counters = NULL;
105#ifdef CONFIG_HOTPLUG_CPU 106#ifdef CONFIG_HOTPLUG_CPU
106 mutex_lock(&percpu_counters_lock); 107 mutex_lock(&percpu_counters_lock);
107 list_del(&fbc->list); 108 list_del(&fbc->list);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e8644b1e5527..847eabe4824c 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -4,12 +4,129 @@
4#include <linux/fs.h> 4#include <linux/fs.h>
5#include <linux/sched.h> 5#include <linux/sched.h>
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/writeback.h>
8#include <linux/device.h>
9
10
11static struct class *bdi_class;
12
13static ssize_t read_ahead_kb_store(struct device *dev,
14 struct device_attribute *attr,
15 const char *buf, size_t count)
16{
17 struct backing_dev_info *bdi = dev_get_drvdata(dev);
18 char *end;
19 unsigned long read_ahead_kb;
20 ssize_t ret = -EINVAL;
21
22 read_ahead_kb = simple_strtoul(buf, &end, 10);
23 if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) {
24 bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
25 ret = count;
26 }
27 return ret;
28}
29
30#define K(pages) ((pages) << (PAGE_SHIFT - 10))
31
32#define BDI_SHOW(name, expr) \
33static ssize_t name##_show(struct device *dev, \
34 struct device_attribute *attr, char *page) \
35{ \
36 struct backing_dev_info *bdi = dev_get_drvdata(dev); \
37 \
38 return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \
39}
40
41BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
42
43BDI_SHOW(reclaimable_kb, K(bdi_stat(bdi, BDI_RECLAIMABLE)))
44BDI_SHOW(writeback_kb, K(bdi_stat(bdi, BDI_WRITEBACK)))
45
46static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i)
47{
48 unsigned long thresh[3];
49
50 get_dirty_limits(&thresh[0], &thresh[1], &thresh[2], bdi);
51
52 return thresh[i];
53}
54
55BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1)))
56BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2)))
57
58#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
59
60static struct device_attribute bdi_dev_attrs[] = {
61 __ATTR_RW(read_ahead_kb),
62 __ATTR_RO(reclaimable_kb),
63 __ATTR_RO(writeback_kb),
64 __ATTR_RO(dirty_kb),
65 __ATTR_RO(bdi_dirty_kb),
66 __ATTR_NULL,
67};
68
69static __init int bdi_class_init(void)
70{
71 bdi_class = class_create(THIS_MODULE, "bdi");
72 bdi_class->dev_attrs = bdi_dev_attrs;
73 return 0;
74}
75
76core_initcall(bdi_class_init);
77
78int bdi_register(struct backing_dev_info *bdi, struct device *parent,
79 const char *fmt, ...)
80{
81 char *name;
82 va_list args;
83 int ret = 0;
84 struct device *dev;
85
86 va_start(args, fmt);
87 name = kvasprintf(GFP_KERNEL, fmt, args);
88 va_end(args);
89
90 if (!name)
91 return -ENOMEM;
92
93 dev = device_create(bdi_class, parent, MKDEV(0, 0), name);
94 if (IS_ERR(dev)) {
95 ret = PTR_ERR(dev);
96 goto exit;
97 }
98
99 bdi->dev = dev;
100 dev_set_drvdata(bdi->dev, bdi);
101
102exit:
103 kfree(name);
104 return ret;
105}
106EXPORT_SYMBOL(bdi_register);
107
108int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
109{
110 return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
111}
112EXPORT_SYMBOL(bdi_register_dev);
113
114void bdi_unregister(struct backing_dev_info *bdi)
115{
116 if (bdi->dev) {
117 device_unregister(bdi->dev);
118 bdi->dev = NULL;
119 }
120}
121EXPORT_SYMBOL(bdi_unregister);
7 122
8int bdi_init(struct backing_dev_info *bdi) 123int bdi_init(struct backing_dev_info *bdi)
9{ 124{
10 int i; 125 int i;
11 int err; 126 int err;
12 127
128 bdi->dev = NULL;
129
13 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 130 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
14 err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); 131 err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0);
15 if (err) 132 if (err)
@@ -33,6 +150,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
33{ 150{
34 int i; 151 int i;
35 152
153 bdi_unregister(bdi);
154
36 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 155 for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
37 percpu_counter_destroy(&bdi->bdi_stat[i]); 156 percpu_counter_destroy(&bdi->bdi_stat[i]);
38 157
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5e00f1772c20..e5b6b1190a95 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -300,7 +300,7 @@ static unsigned long determine_dirtyable_memory(void)
300 return x + 1; /* Ensure that we never return 0 */ 300 return x + 1; /* Ensure that we never return 0 */
301} 301}
302 302
303static void 303void
304get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, 304get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
305 struct backing_dev_info *bdi) 305 struct backing_dev_info *bdi)
306{ 306{
diff --git a/mm/readahead.c b/mm/readahead.c
index 8762e8988972..d8723a5f6496 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -235,7 +235,13 @@ unsigned long max_sane_readahead(unsigned long nr)
235 235
236static int __init readahead_init(void) 236static int __init readahead_init(void)
237{ 237{
238 return bdi_init(&default_backing_dev_info); 238 int err;
239
240 err = bdi_init(&default_backing_dev_info);
241 if (!err)
242 bdi_register(&default_backing_dev_info, NULL, "default");
243
244 return err;
239} 245}
240subsys_initcall(readahead_init); 246subsys_initcall(readahead_init);
241 247