diff options
author | Miklos Szeredi <mszeredi@suse.cz> | 2010-12-07 14:16:56 -0500 |
---|---|---|
committer | Miklos Szeredi <mszeredi@suse.cz> | 2010-12-07 14:16:56 -0500 |
commit | 02c048b919455aaa38628563cdcc2e691c8a9f53 (patch) | |
tree | 1f98cd9fab564b6df8869a60f36e82d5d20f14f3 | |
parent | 07e77dca8a1f17a724a9b7449f0ca02e70e9d057 (diff) |
fuse: allow batching of FORGET requests
Terje Malmedal reports that a fuse filesystem with 32 million inodes
on a machine with lots of memory can take up to 30 minutes to process
FORGET requests when all those inodes are evicted from the icache.
To solve this, create a BATCH_FORGET request that allows up to about
8000 FORGET requests to be sent in a single message.
This request is only sent if userspace supports interface version 7.16
or later, otherwise fall back to sending individual FORGET messages.
Reported-by: Terje Malmedal <terje.malmedal@usit.uio.no>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
-rw-r--r-- | fs/fuse/dev.c | 92 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 3 | ||||
-rw-r--r-- | include/linux/fuse.h | 16 |
3 files changed, 97 insertions, 14 deletions
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index fed65303eeeb..cf8d28d1fbad 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -254,8 +254,8 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) | |||
254 | void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, | 254 | void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, |
255 | u64 nodeid, u64 nlookup) | 255 | u64 nodeid, u64 nlookup) |
256 | { | 256 | { |
257 | forget->nodeid = nodeid; | 257 | forget->forget_one.nodeid = nodeid; |
258 | forget->nlookup = nlookup; | 258 | forget->forget_one.nlookup = nlookup; |
259 | 259 | ||
260 | spin_lock(&fc->lock); | 260 | spin_lock(&fc->lock); |
261 | fc->forget_list_tail->next = forget; | 261 | fc->forget_list_tail->next = forget; |
@@ -974,15 +974,26 @@ __releases(fc->lock) | |||
974 | return err ? err : reqsize; | 974 | return err ? err : reqsize; |
975 | } | 975 | } |
976 | 976 | ||
977 | static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc) | 977 | static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc, |
978 | unsigned max, | ||
979 | unsigned *countp) | ||
978 | { | 980 | { |
979 | struct fuse_forget_link *forget = fc->forget_list_head.next; | 981 | struct fuse_forget_link *head = fc->forget_list_head.next; |
982 | struct fuse_forget_link **newhead = &head; | ||
983 | unsigned count; | ||
980 | 984 | ||
981 | fc->forget_list_head.next = forget->next; | 985 | for (count = 0; *newhead != NULL && count < max; count++) |
986 | newhead = &(*newhead)->next; | ||
987 | |||
988 | fc->forget_list_head.next = *newhead; | ||
989 | *newhead = NULL; | ||
982 | if (fc->forget_list_head.next == NULL) | 990 | if (fc->forget_list_head.next == NULL) |
983 | fc->forget_list_tail = &fc->forget_list_head; | 991 | fc->forget_list_tail = &fc->forget_list_head; |
984 | 992 | ||
985 | return forget; | 993 | if (countp != NULL) |
994 | *countp = count; | ||
995 | |||
996 | return head; | ||
986 | } | 997 | } |
987 | 998 | ||
988 | static int fuse_read_single_forget(struct fuse_conn *fc, | 999 | static int fuse_read_single_forget(struct fuse_conn *fc, |
@@ -991,13 +1002,13 @@ static int fuse_read_single_forget(struct fuse_conn *fc, | |||
991 | __releases(fc->lock) | 1002 | __releases(fc->lock) |
992 | { | 1003 | { |
993 | int err; | 1004 | int err; |
994 | struct fuse_forget_link *forget = dequeue_forget(fc); | 1005 | struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL); |
995 | struct fuse_forget_in arg = { | 1006 | struct fuse_forget_in arg = { |
996 | .nlookup = forget->nlookup, | 1007 | .nlookup = forget->forget_one.nlookup, |
997 | }; | 1008 | }; |
998 | struct fuse_in_header ih = { | 1009 | struct fuse_in_header ih = { |
999 | .opcode = FUSE_FORGET, | 1010 | .opcode = FUSE_FORGET, |
1000 | .nodeid = forget->nodeid, | 1011 | .nodeid = forget->forget_one.nodeid, |
1001 | .unique = fuse_get_unique(fc), | 1012 | .unique = fuse_get_unique(fc), |
1002 | .len = sizeof(ih) + sizeof(arg), | 1013 | .len = sizeof(ih) + sizeof(arg), |
1003 | }; | 1014 | }; |
@@ -1018,6 +1029,65 @@ __releases(fc->lock) | |||
1018 | return ih.len; | 1029 | return ih.len; |
1019 | } | 1030 | } |
1020 | 1031 | ||
1032 | static int fuse_read_batch_forget(struct fuse_conn *fc, | ||
1033 | struct fuse_copy_state *cs, size_t nbytes) | ||
1034 | __releases(fc->lock) | ||
1035 | { | ||
1036 | int err; | ||
1037 | unsigned max_forgets; | ||
1038 | unsigned count; | ||
1039 | struct fuse_forget_link *head; | ||
1040 | struct fuse_batch_forget_in arg = { .count = 0 }; | ||
1041 | struct fuse_in_header ih = { | ||
1042 | .opcode = FUSE_BATCH_FORGET, | ||
1043 | .unique = fuse_get_unique(fc), | ||
1044 | .len = sizeof(ih) + sizeof(arg), | ||
1045 | }; | ||
1046 | |||
1047 | if (nbytes < ih.len) { | ||
1048 | spin_unlock(&fc->lock); | ||
1049 | return -EINVAL; | ||
1050 | } | ||
1051 | |||
1052 | max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); | ||
1053 | head = dequeue_forget(fc, max_forgets, &count); | ||
1054 | spin_unlock(&fc->lock); | ||
1055 | |||
1056 | arg.count = count; | ||
1057 | ih.len += count * sizeof(struct fuse_forget_one); | ||
1058 | err = fuse_copy_one(cs, &ih, sizeof(ih)); | ||
1059 | if (!err) | ||
1060 | err = fuse_copy_one(cs, &arg, sizeof(arg)); | ||
1061 | |||
1062 | while (head) { | ||
1063 | struct fuse_forget_link *forget = head; | ||
1064 | |||
1065 | if (!err) { | ||
1066 | err = fuse_copy_one(cs, &forget->forget_one, | ||
1067 | sizeof(forget->forget_one)); | ||
1068 | } | ||
1069 | head = forget->next; | ||
1070 | kfree(forget); | ||
1071 | } | ||
1072 | |||
1073 | fuse_copy_finish(cs); | ||
1074 | |||
1075 | if (err) | ||
1076 | return err; | ||
1077 | |||
1078 | return ih.len; | ||
1079 | } | ||
1080 | |||
1081 | static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, | ||
1082 | size_t nbytes) | ||
1083 | __releases(fc->lock) | ||
1084 | { | ||
1085 | if (fc->minor < 16 || fc->forget_list_head.next->next == NULL) | ||
1086 | return fuse_read_single_forget(fc, cs, nbytes); | ||
1087 | else | ||
1088 | return fuse_read_batch_forget(fc, cs, nbytes); | ||
1089 | } | ||
1090 | |||
1021 | /* | 1091 | /* |
1022 | * Read a single request into the userspace filesystem's buffer. This | 1092 | * Read a single request into the userspace filesystem's buffer. This |
1023 | * function waits until a request is available, then removes it from | 1093 | * function waits until a request is available, then removes it from |
@@ -1058,7 +1128,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, | |||
1058 | 1128 | ||
1059 | if (forget_pending(fc)) { | 1129 | if (forget_pending(fc)) { |
1060 | if (list_empty(&fc->pending) || fc->forget_batch-- > 0) | 1130 | if (list_empty(&fc->pending) || fc->forget_batch-- > 0) |
1061 | return fuse_read_single_forget(fc, cs, nbytes); | 1131 | return fuse_read_forget(fc, cs, nbytes); |
1062 | 1132 | ||
1063 | if (fc->forget_batch <= -8) | 1133 | if (fc->forget_batch <= -8) |
1064 | fc->forget_batch = 16; | 1134 | fc->forget_batch = 16; |
@@ -1837,7 +1907,7 @@ __acquires(fc->lock) | |||
1837 | end_requests(fc, &fc->pending); | 1907 | end_requests(fc, &fc->pending); |
1838 | end_requests(fc, &fc->processing); | 1908 | end_requests(fc, &fc->processing); |
1839 | while (forget_pending(fc)) | 1909 | while (forget_pending(fc)) |
1840 | kfree(dequeue_forget(fc)); | 1910 | kfree(dequeue_forget(fc, 1, NULL)); |
1841 | } | 1911 | } |
1842 | 1912 | ||
1843 | /* | 1913 | /* |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 33369c63a522..ae5744a2f9e9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -55,8 +55,7 @@ extern unsigned max_user_congthresh; | |||
55 | 55 | ||
56 | /* One forget request */ | 56 | /* One forget request */ |
57 | struct fuse_forget_link { | 57 | struct fuse_forget_link { |
58 | u64 nodeid; | 58 | struct fuse_forget_one forget_one; |
59 | u64 nlookup; | ||
60 | struct fuse_forget_link *next; | 59 | struct fuse_forget_link *next; |
61 | }; | 60 | }; |
62 | 61 | ||
diff --git a/include/linux/fuse.h b/include/linux/fuse.h index c3c578e09833..cf11881f4938 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h | |||
@@ -41,6 +41,9 @@ | |||
41 | * 7.15 | 41 | * 7.15 |
42 | * - add store notify | 42 | * - add store notify |
43 | * - add retrieve notify | 43 | * - add retrieve notify |
44 | * | ||
45 | * 7.16 | ||
46 | * - add BATCH_FORGET request | ||
44 | */ | 47 | */ |
45 | 48 | ||
46 | #ifndef _LINUX_FUSE_H | 49 | #ifndef _LINUX_FUSE_H |
@@ -72,7 +75,7 @@ | |||
72 | #define FUSE_KERNEL_VERSION 7 | 75 | #define FUSE_KERNEL_VERSION 7 |
73 | 76 | ||
74 | /** Minor version number of this interface */ | 77 | /** Minor version number of this interface */ |
75 | #define FUSE_KERNEL_MINOR_VERSION 15 | 78 | #define FUSE_KERNEL_MINOR_VERSION 16 |
76 | 79 | ||
77 | /** The node ID of the root inode */ | 80 | /** The node ID of the root inode */ |
78 | #define FUSE_ROOT_ID 1 | 81 | #define FUSE_ROOT_ID 1 |
@@ -256,6 +259,7 @@ enum fuse_opcode { | |||
256 | FUSE_IOCTL = 39, | 259 | FUSE_IOCTL = 39, |
257 | FUSE_POLL = 40, | 260 | FUSE_POLL = 40, |
258 | FUSE_NOTIFY_REPLY = 41, | 261 | FUSE_NOTIFY_REPLY = 41, |
262 | FUSE_BATCH_FORGET = 42, | ||
259 | 263 | ||
260 | /* CUSE specific operations */ | 264 | /* CUSE specific operations */ |
261 | CUSE_INIT = 4096, | 265 | CUSE_INIT = 4096, |
@@ -290,6 +294,16 @@ struct fuse_forget_in { | |||
290 | __u64 nlookup; | 294 | __u64 nlookup; |
291 | }; | 295 | }; |
292 | 296 | ||
297 | struct fuse_forget_one { | ||
298 | __u64 nodeid; | ||
299 | __u64 nlookup; | ||
300 | }; | ||
301 | |||
302 | struct fuse_batch_forget_in { | ||
303 | __u32 count; | ||
304 | __u32 dummy; | ||
305 | }; | ||
306 | |||
293 | struct fuse_getattr_in { | 307 | struct fuse_getattr_in { |
294 | __u32 getattr_flags; | 308 | __u32 getattr_flags; |
295 | __u32 dummy; | 309 | __u32 dummy; |