From 7a6d3c8b3049d07123628f2bf57127bba2cc878f Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Wed, 1 Jul 2009 17:28:41 -0700 Subject: fuse: make the number of max background requests and congestion threshold tunable The practical values for these limits depend on the design of the filesystem server so let userspace set them at initialization time. Signed-off-by: Csaba Henk Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 10 +++++----- fs/fuse/fuse_i.h | 12 ++++++------ fs/fuse/inode.c | 14 ++++++++++++++ include/linux/fuse.h | 9 +++++++-- 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f58ecbc416c8..b152761c1bf6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -250,7 +250,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) static void flush_bg_queue(struct fuse_conn *fc) { - while (fc->active_background < FUSE_MAX_BACKGROUND && + while (fc->active_background < fc->max_background && !list_empty(&fc->bg_queue)) { struct fuse_req *req; @@ -280,11 +280,11 @@ __releases(&fc->lock) list_del(&req->intr_entry); req->state = FUSE_REQ_FINISHED; if (req->background) { - if (fc->num_background == FUSE_MAX_BACKGROUND) { + if (fc->num_background == fc->max_background) { fc->blocked = 0; wake_up_all(&fc->blocked_waitq); } - if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + if (fc->num_background == fc->congestion_threshold && fc->connected && fc->bdi_initialized) { clear_bdi_congested(&fc->bdi, READ); clear_bdi_congested(&fc->bdi, WRITE); @@ -410,9 +410,9 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, { req->background = 1; fc->num_background++; - if (fc->num_background == FUSE_MAX_BACKGROUND) + if (fc->num_background == fc->max_background) fc->blocked = 1; - if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + if (fc->num_background == fc->congestion_threshold && fc->bdi_initialized) { set_bdi_congested(&fc->bdi, READ); set_bdi_congested(&fc->bdi, WRITE); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 52b641fc0faf..6bcfab04396f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -25,12 +25,6 @@ /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 -/** Maximum number of outstanding background requests */ -#define FUSE_MAX_BACKGROUND 12 - -/** Congestion starts at 75% of maximum */ -#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) - /** Bias for fi->writectr, meaning new writepages must not be sent */ #define FUSE_NOWRITE INT_MIN @@ -349,6 +343,12 @@ struct fuse_conn { /** rbtree of fuse_files waiting for poll events indexed by ph */ struct rb_root polled_files; + /** Maximum number of outstanding background requests */ + unsigned max_background; + + /** Number of background requests at which congestion starts */ + unsigned congestion_threshold; + /** Number of requests currently in the background */ unsigned num_background; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f91ccc4a189d..9aa6f46d0c32 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -32,6 +32,12 @@ DEFINE_MUTEX(fuse_mutex); #define FUSE_DEFAULT_BLKSIZE 512 +/** Maximum number of outstanding background requests */ +#define FUSE_DEFAULT_MAX_BACKGROUND 12 + +/** Congestion starts at 75% of maximum */ +#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) + struct fuse_mount_data { int fd; unsigned rootmode; @@ -517,6 +523,8 @@ void fuse_conn_init(struct fuse_conn *fc) INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); atomic_set(&fc->num_waiting, 0); + fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; + fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; fc->khctr = 0; fc->polled_files = RB_ROOT; fc->reqctr = 0; @@ -736,6 +744,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) else { unsigned long ra_pages; + if (arg->minor >= 13) { + if (arg->max_background) + fc->max_background = arg->max_background; + if (arg->congestion_threshold) + fc->congestion_threshold = arg->congestion_threshold; + } if (arg->minor >= 6) { ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; if (arg->flags & FUSE_ASYNC_READ) diff --git a/include/linux/fuse.h b/include/linux/fuse.h index cf593bf9fd32..b3700f0ac268 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -30,6 +30,10 @@ * - add umask flag to input argument of open, mknod and mkdir * - add notification messages for invalidation of inodes and * directory entries + * + * 7.13 + * - make max number of background requests and congestion threshold + * tunables */ #ifndef _LINUX_FUSE_H @@ -41,7 +45,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 12 +#define FUSE_KERNEL_MINOR_VERSION 13 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -427,7 +431,8 @@ struct fuse_init_out { __u32 minor; __u32 max_readahead; __u32 flags; - __u32 unused; + __u16 max_background; + __u16 congestion_threshold; __u32 max_write; }; -- cgit v1.2.2 From 37d217f029a56a6d385f99773fb27dfcb51f9a46 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 8 Jul 2009 18:17:58 +0200 Subject: fuse: document protocol version negotiation Clarify how the protocol version should be negotiated between kernel and userspace. Notably libfuse didn't correctly handle the case when the supported major versions didn't match. Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/fuse.h b/include/linux/fuse.h index b3700f0ac268..3e2925a34bf0 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -41,6 +41,26 @@ #include +/* + * Version negotiation: + * + * Both the kernel and userspace send the version they support in the + * INIT request and reply respectively. + * + * If the major versions match then both shall use the smallest + * of the two minor versions for communication. + * + * If the kernel supports a larger major version, then userspace shall + * reply with the major version it supports, ignore the rest of the + * INIT message and expect a new INIT message from the kernel with a + * matching major version. + * + * If the library supports a larger major version, then it shall fall + * back to the major protocol version sent by the kernel for + * communication and reply with that major version (and an arbitrary + * supported minor version). + */ + /** Version number of this interface */ #define FUSE_KERNEL_VERSION 7 -- cgit v1.2.2 From d6db07ded51c5fb4df2f4a32e6a41e9bb5db7fc4 Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Mon, 24 Aug 2009 06:14:07 +0200 Subject: fuse: use drop_nlink() instead of direct nlink manipulation drop_nlink() is the API function to decrease the link count of an inode. However, at a place the control filesystem used the decrement operator on i_nlink directly. Fix this. Cc: Anand Avati Signed-off-by: Csaba Henk Signed-off-by: Miklos Szeredi --- fs/fuse/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 99c99dfb0373..218d514924c0 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -156,7 +156,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) d_drop(dentry); dput(dentry); } - fuse_control_sb->s_root->d_inode->i_nlink--; + drop_nlink(fuse_control_sb->s_root->d_inode); } static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) -- cgit v1.2.2 From 487ea5af6358cb27c994e2cf056d4ee0872e43c3 Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Wed, 26 Aug 2009 19:17:22 +0200 Subject: fuse: limit user-specified values of max background requests An untrusted user could DoS the system if s/he were allowed to accumulate an arbitrary number of pending background requests by setting the above limits to extremely high values in INIT. This patch excludes this possibility by imposing global upper limits on the possible values of per-mount "max background requests" and "congestion threshold" parameters for unprivileged FUSE filesystems. These global limits are implemented as module parameters. Signed-off-by: Csaba Henk Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9aa6f46d0c32..e54ddbda208c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,24 @@ static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); +static int set_global_limit(const char *val, struct kernel_param *kp); + +static unsigned max_user_bgreq; +module_param_call(max_user_bgreq, set_global_limit, param_get_uint, + &max_user_bgreq, 0644); +__MODULE_PARM_TYPE(max_user_bgreq, "uint"); +MODULE_PARM_DESC(max_user_bgreq, + "Global limit for the maximum number of backgrounded requests an " + "unprivileged user can set"); + +static unsigned max_user_congthresh; +module_param_call(max_user_congthresh, set_global_limit, param_get_uint, + &max_user_congthresh, 0644); +__MODULE_PARM_TYPE(max_user_congthresh, "uint"); +MODULE_PARM_DESC(max_user_congthresh, + "Global limit for the maximum congestion threshold an " + "unprivileged user can set"); + #define FUSE_SUPER_MAGIC 0x65735546 #define FUSE_DEFAULT_BLKSIZE 512 @@ -735,6 +754,54 @@ static const struct super_operations fuse_super_operations = { .show_options = fuse_show_options, }; +static void sanitize_global_limit(unsigned *limit) +{ + if (*limit == 0) + *limit = ((num_physpages << PAGE_SHIFT) >> 13) / + sizeof(struct fuse_req); + + if (*limit >= 1 << 16) + *limit = (1 << 16) - 1; +} + +static int set_global_limit(const char *val, struct kernel_param *kp) +{ + int rv; + + rv = param_set_uint(val, kp); + if (rv) + return rv; + + sanitize_global_limit((unsigned *)kp->arg); + + return 0; +} + +static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) +{ + int cap_sys_admin = capable(CAP_SYS_ADMIN); + + if (arg->minor < 13) + return; + + sanitize_global_limit(&max_user_bgreq); + sanitize_global_limit(&max_user_congthresh); + + if (arg->max_background) { + fc->max_background = arg->max_background; + + if (!cap_sys_admin && fc->max_background > max_user_bgreq) + fc->max_background = max_user_bgreq; + } + if (arg->congestion_threshold) { + fc->congestion_threshold = arg->congestion_threshold; + + if (!cap_sys_admin && + fc->congestion_threshold > max_user_congthresh) + fc->congestion_threshold = max_user_congthresh; + } +} + static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) { struct fuse_init_out *arg = &req->misc.init_out; @@ -744,12 +811,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) else { unsigned long ra_pages; - if (arg->minor >= 13) { - if (arg->max_background) - fc->max_background = arg->max_background; - if (arg->congestion_threshold) - fc->congestion_threshold = arg->congestion_threshold; - } + process_init_limits(fc, arg); + if (arg->minor >= 6) { ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; if (arg->flags & FUSE_ASYNC_READ) @@ -1161,6 +1224,9 @@ static int __init fuse_init(void) if (res) goto err_sysfs_cleanup; + sanitize_global_limit(&max_user_bgreq); + sanitize_global_limit(&max_user_congthresh); + return 0; err_sysfs_cleanup: -- cgit v1.2.2 From 79a9d99434b104c562f30f21b75317667f444793 Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Wed, 26 Aug 2009 19:18:24 +0200 Subject: fuse: add fusectl interface to max_background Make the max_background and congestion_threshold parameters of a FUSE mount tunable at runtime by adding the respective knobs to its directory within the fusectl filesystem. Signed-off-by: Csaba Henk Signed-off-by: Miklos Szeredi --- fs/fuse/control.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/fuse/fuse_i.h | 6 ++- fs/fuse/inode.c | 4 +- 3 files changed, 141 insertions(+), 5 deletions(-) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 218d514924c0..3773fd63d2f9 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -61,6 +61,121 @@ static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf, return simple_read_from_buffer(buf, len, ppos, tmp, size); } +static ssize_t fuse_conn_limit_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos, unsigned val) +{ + char tmp[32]; + size_t size = sprintf(tmp, "%u\n", val); + + return simple_read_from_buffer(buf, len, ppos, tmp, size); +} + +static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, unsigned *val, + unsigned global_limit) +{ + unsigned long t; + char tmp[32]; + unsigned limit = (1 << 16) - 1; + int err; + + if (*ppos || count >= sizeof(tmp) - 1) + return -EINVAL; + + if (copy_from_user(tmp, buf, count)) + return -EINVAL; + + tmp[count] = '\0'; + + err = strict_strtoul(tmp, 0, &t); + if (err) + return err; + + if (!capable(CAP_SYS_ADMIN)) + limit = min(limit, global_limit); + + if (t > limit) + return -EINVAL; + + *val = t; + + return count; +} + +static ssize_t fuse_conn_max_background_read(struct file *file, + char __user *buf, size_t len, + loff_t *ppos) +{ + struct fuse_conn *fc; + unsigned val; + + fc = fuse_ctl_file_conn_get(file); + if (!fc) + return 0; + + val = fc->max_background; + fuse_conn_put(fc); + + return fuse_conn_limit_read(file, buf, len, ppos, val); +} + +static ssize_t fuse_conn_max_background_write(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned val; + ssize_t ret; + + ret = fuse_conn_limit_write(file, buf, count, ppos, &val, + max_user_bgreq); + if (ret > 0) { + struct fuse_conn *fc = fuse_ctl_file_conn_get(file); + if (fc) { + fc->max_background = val; + fuse_conn_put(fc); + } + } + + return ret; +} + +static ssize_t fuse_conn_congestion_threshold_read(struct file *file, + char __user *buf, size_t len, + loff_t *ppos) +{ + struct fuse_conn *fc; + unsigned val; + + fc = fuse_ctl_file_conn_get(file); + if (!fc) + return 0; + + val = fc->congestion_threshold; + fuse_conn_put(fc); + + return fuse_conn_limit_read(file, buf, len, ppos, val); +} + +static ssize_t fuse_conn_congestion_threshold_write(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned val; + ssize_t ret; + + ret = fuse_conn_limit_write(file, buf, count, ppos, &val, + max_user_congthresh); + if (ret > 0) { + struct fuse_conn *fc = fuse_ctl_file_conn_get(file); + if (fc) { + fc->congestion_threshold = val; + fuse_conn_put(fc); + } + } + + return ret; +} + static const struct file_operations fuse_ctl_abort_ops = { .open = nonseekable_open, .write = fuse_conn_abort_write, @@ -71,6 +186,18 @@ static const struct file_operations fuse_ctl_waiting_ops = { .read = fuse_conn_waiting_read, }; +static const struct file_operations fuse_conn_max_background_ops = { + .open = nonseekable_open, + .read = fuse_conn_max_background_read, + .write = fuse_conn_max_background_write, +}; + +static const struct file_operations fuse_conn_congestion_threshold_ops = { + .open = nonseekable_open, + .read = fuse_conn_congestion_threshold_read, + .write = fuse_conn_congestion_threshold_write, +}; + static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, struct fuse_conn *fc, const char *name, @@ -127,9 +254,14 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) goto err; if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, - NULL, &fuse_ctl_waiting_ops) || + NULL, &fuse_ctl_waiting_ops) || !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, - NULL, &fuse_ctl_abort_ops)) + NULL, &fuse_ctl_abort_ops) || + !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600, + 1, NULL, &fuse_conn_max_background_ops) || + !fuse_ctl_add_dentry(parent, fc, "congestion_threshold", + S_IFREG | 0600, 1, NULL, + &fuse_conn_congestion_threshold_ops)) goto err; return 0; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6bcfab04396f..fc9c79feb5f7 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -32,7 +32,7 @@ #define FUSE_NAME_MAX 1024 /** Number of dentries for each connection in the control filesystem */ -#define FUSE_CTL_NUM_DENTRIES 3 +#define FUSE_CTL_NUM_DENTRIES 5 /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem module will check permissions based on the file mode. Otherwise no @@ -49,6 +49,10 @@ extern struct list_head fuse_conn_list; /** Global mutex protecting fuse_conn_list and the control filesystem */ extern struct mutex fuse_mutex; +/** Module parameters */ +extern unsigned max_user_bgreq; +extern unsigned max_user_congthresh; + /** FUSE inode */ struct fuse_inode { /** Inode data */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e54ddbda208c..8f9aca8d4ad5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -31,7 +31,7 @@ DEFINE_MUTEX(fuse_mutex); static int set_global_limit(const char *val, struct kernel_param *kp); -static unsigned max_user_bgreq; +unsigned max_user_bgreq; module_param_call(max_user_bgreq, set_global_limit, param_get_uint, &max_user_bgreq, 0644); __MODULE_PARM_TYPE(max_user_bgreq, "uint"); @@ -39,7 +39,7 @@ MODULE_PARM_DESC(max_user_bgreq, "Global limit for the maximum number of backgrounded requests an " "unprivileged user can set"); -static unsigned max_user_congthresh; +unsigned max_user_congthresh; module_param_call(max_user_congthresh, set_global_limit, param_get_uint, &max_user_congthresh, 0644); __MODULE_PARM_TYPE(max_user_congthresh, "uint"); -- cgit v1.2.2