diff options
author | Anand V. Avati <avati@redhat.com> | 2012-08-19 08:53:23 -0400 |
---|---|---|
committer | Miklos Szeredi <mszeredi@suse.cz> | 2013-01-24 10:21:25 -0500 |
commit | 0b05b18381eea98c9c9ada95629bf659a88c9374 (patch) | |
tree | a6389eaffda03a2e28cb05be242e03ef839fcb91 | |
parent | ff7532ca2c631e7e96dcd305a967b610259dc0ea (diff) |
fuse: implement NFS-like readdirplus support
This patch implements readdirplus support in FUSE, similar to NFS.
The payload returned in the readdirplus call contains
'fuse_entry_out' structure thereby providing all the necessary inputs
for 'faking' a lookup() operation on the spot.
If the dentry and inode already existed (for e.g. in a re-run of ls -l)
then just the inode attributes timeout and dentry timeout are refreshed.
With a simple client->network->server implementation of a FUSE based
filesystem, the following performance observations were made:
Test: Performing a filesystem crawl over 20,000 files with
sh# time ls -lR /mnt
Without readdirplus:
Run 1: 18.1s
Run 2: 16.0s
Run 3: 16.2s
With readdirplus:
Run 1: 4.1s
Run 2: 3.8s
Run 3: 3.8s
The performance improvement is significant as it avoided 20,000 upcalls
calls (lookup). Cache consistency is no worse than what already is.
Signed-off-by: Anand V. Avati <avati@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
-rw-r--r-- | fs/fuse/dev.c | 19 | ||||
-rw-r--r-- | fs/fuse/dir.c | 160 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 6 | ||||
-rw-r--r-- | fs/fuse/inode.c | 5 | ||||
-rw-r--r-- | include/uapi/linux/fuse.h | 12 |
5 files changed, 197 insertions, 5 deletions
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e83351aa5bad..05c3eec298f2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -491,6 +491,25 @@ void fuse_request_send_background_locked(struct fuse_conn *fc, | |||
491 | fuse_request_send_nowait_locked(fc, req); | 491 | fuse_request_send_nowait_locked(fc, req); |
492 | } | 492 | } |
493 | 493 | ||
494 | void fuse_force_forget(struct file *file, u64 nodeid) | ||
495 | { | ||
496 | struct inode *inode = file->f_path.dentry->d_inode; | ||
497 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
498 | struct fuse_req *req; | ||
499 | struct fuse_forget_in inarg; | ||
500 | |||
501 | memset(&inarg, 0, sizeof(inarg)); | ||
502 | inarg.nlookup = 1; | ||
503 | req = fuse_get_req_nofail(fc, file); | ||
504 | req->in.h.opcode = FUSE_FORGET; | ||
505 | req->in.h.nodeid = nodeid; | ||
506 | req->in.numargs = 1; | ||
507 | req->in.args[0].size = sizeof(inarg); | ||
508 | req->in.args[0].value = &inarg; | ||
509 | req->isreply = 0; | ||
510 | fuse_request_send_nowait(fc, req); | ||
511 | } | ||
512 | |||
494 | /* | 513 | /* |
495 | * Lock the request. Up to the next unlock_request() there mustn't be | 514 | * Lock the request. Up to the next unlock_request() there mustn't be |
496 | * anything that could cause a page-fault. If the request was already | 515 | * anything that could cause a page-fault. If the request was already |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b7c09f9eb40c..dcc1e522c7d4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -1155,6 +1155,143 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, | |||
1155 | return 0; | 1155 | return 0; |
1156 | } | 1156 | } |
1157 | 1157 | ||
1158 | static int fuse_direntplus_link(struct file *file, | ||
1159 | struct fuse_direntplus *direntplus, | ||
1160 | u64 attr_version) | ||
1161 | { | ||
1162 | int err; | ||
1163 | struct fuse_entry_out *o = &direntplus->entry_out; | ||
1164 | struct fuse_dirent *dirent = &direntplus->dirent; | ||
1165 | struct dentry *parent = file->f_path.dentry; | ||
1166 | struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); | ||
1167 | struct dentry *dentry; | ||
1168 | struct dentry *alias; | ||
1169 | struct inode *dir = parent->d_inode; | ||
1170 | struct fuse_conn *fc; | ||
1171 | struct inode *inode; | ||
1172 | |||
1173 | if (!o->nodeid) { | ||
1174 | /* | ||
1175 | * Unlike in the case of fuse_lookup, zero nodeid does not mean | ||
1176 | * ENOENT. Instead, it only means the userspace filesystem did | ||
1177 | * not want to return attributes/handle for this entry. | ||
1178 | * | ||
1179 | * So do nothing. | ||
1180 | */ | ||
1181 | return 0; | ||
1182 | } | ||
1183 | |||
1184 | if (name.name[0] == '.') { | ||
1185 | /* | ||
1186 | * We could potentially refresh the attributes of the directory | ||
1187 | * and its parent? | ||
1188 | */ | ||
1189 | if (name.len == 1) | ||
1190 | return 0; | ||
1191 | if (name.name[1] == '.' && name.len == 2) | ||
1192 | return 0; | ||
1193 | } | ||
1194 | fc = get_fuse_conn(dir); | ||
1195 | |||
1196 | name.hash = full_name_hash(name.name, name.len); | ||
1197 | dentry = d_lookup(parent, &name); | ||
1198 | if (dentry && dentry->d_inode) { | ||
1199 | inode = dentry->d_inode; | ||
1200 | if (get_node_id(inode) == o->nodeid) { | ||
1201 | struct fuse_inode *fi; | ||
1202 | fi = get_fuse_inode(inode); | ||
1203 | spin_lock(&fc->lock); | ||
1204 | fi->nlookup++; | ||
1205 | spin_unlock(&fc->lock); | ||
1206 | |||
1207 | /* | ||
1208 | * The other branch to 'found' comes via fuse_iget() | ||
1209 | * which bumps nlookup inside | ||
1210 | */ | ||
1211 | goto found; | ||
1212 | } | ||
1213 | err = d_invalidate(dentry); | ||
1214 | if (err) | ||
1215 | goto out; | ||
1216 | dput(dentry); | ||
1217 | dentry = NULL; | ||
1218 | } | ||
1219 | |||
1220 | dentry = d_alloc(parent, &name); | ||
1221 | err = -ENOMEM; | ||
1222 | if (!dentry) | ||
1223 | goto out; | ||
1224 | |||
1225 | inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, | ||
1226 | &o->attr, entry_attr_timeout(o), attr_version); | ||
1227 | if (!inode) | ||
1228 | goto out; | ||
1229 | |||
1230 | alias = d_materialise_unique(dentry, inode); | ||
1231 | err = PTR_ERR(alias); | ||
1232 | if (IS_ERR(alias)) | ||
1233 | goto out; | ||
1234 | if (alias) { | ||
1235 | dput(dentry); | ||
1236 | dentry = alias; | ||
1237 | } | ||
1238 | |||
1239 | found: | ||
1240 | fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), | ||
1241 | attr_version); | ||
1242 | |||
1243 | fuse_change_entry_timeout(dentry, o); | ||
1244 | |||
1245 | err = 0; | ||
1246 | out: | ||
1247 | if (dentry) | ||
1248 | dput(dentry); | ||
1249 | return err; | ||
1250 | } | ||
1251 | |||
1252 | static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, | ||
1253 | void *dstbuf, filldir_t filldir, u64 attr_version) | ||
1254 | { | ||
1255 | struct fuse_direntplus *direntplus; | ||
1256 | struct fuse_dirent *dirent; | ||
1257 | size_t reclen; | ||
1258 | int over = 0; | ||
1259 | int ret; | ||
1260 | |||
1261 | while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { | ||
1262 | direntplus = (struct fuse_direntplus *) buf; | ||
1263 | dirent = &direntplus->dirent; | ||
1264 | reclen = FUSE_DIRENTPLUS_SIZE(direntplus); | ||
1265 | |||
1266 | if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) | ||
1267 | return -EIO; | ||
1268 | if (reclen > nbytes) | ||
1269 | break; | ||
1270 | |||
1271 | if (!over) { | ||
1272 | /* We fill entries into dstbuf only as much as | ||
1273 | it can hold. But we still continue iterating | ||
1274 | over remaining entries to link them. If not, | ||
1275 | we need to send a FORGET for each of those | ||
1276 | which we did not link. | ||
1277 | */ | ||
1278 | over = filldir(dstbuf, dirent->name, dirent->namelen, | ||
1279 | file->f_pos, dirent->ino, | ||
1280 | dirent->type); | ||
1281 | file->f_pos = dirent->off; | ||
1282 | } | ||
1283 | |||
1284 | buf += reclen; | ||
1285 | nbytes -= reclen; | ||
1286 | |||
1287 | ret = fuse_direntplus_link(file, direntplus, attr_version); | ||
1288 | if (ret) | ||
1289 | fuse_force_forget(file, direntplus->entry_out.nodeid); | ||
1290 | } | ||
1291 | |||
1292 | return 0; | ||
1293 | } | ||
1294 | |||
1158 | static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | 1295 | static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) |
1159 | { | 1296 | { |
1160 | int err; | 1297 | int err; |
@@ -1163,6 +1300,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | |||
1163 | struct inode *inode = file->f_path.dentry->d_inode; | 1300 | struct inode *inode = file->f_path.dentry->d_inode; |
1164 | struct fuse_conn *fc = get_fuse_conn(inode); | 1301 | struct fuse_conn *fc = get_fuse_conn(inode); |
1165 | struct fuse_req *req; | 1302 | struct fuse_req *req; |
1303 | u64 attr_version = 0; | ||
1166 | 1304 | ||
1167 | if (is_bad_inode(inode)) | 1305 | if (is_bad_inode(inode)) |
1168 | return -EIO; | 1306 | return -EIO; |
@@ -1179,14 +1317,28 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | |||
1179 | req->out.argpages = 1; | 1317 | req->out.argpages = 1; |
1180 | req->num_pages = 1; | 1318 | req->num_pages = 1; |
1181 | req->pages[0] = page; | 1319 | req->pages[0] = page; |
1182 | fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); | 1320 | if (fc->do_readdirplus) { |
1321 | attr_version = fuse_get_attr_version(fc); | ||
1322 | fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, | ||
1323 | FUSE_READDIRPLUS); | ||
1324 | } else { | ||
1325 | fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, | ||
1326 | FUSE_READDIR); | ||
1327 | } | ||
1183 | fuse_request_send(fc, req); | 1328 | fuse_request_send(fc, req); |
1184 | nbytes = req->out.args[0].size; | 1329 | nbytes = req->out.args[0].size; |
1185 | err = req->out.h.error; | 1330 | err = req->out.h.error; |
1186 | fuse_put_request(fc, req); | 1331 | fuse_put_request(fc, req); |
1187 | if (!err) | 1332 | if (!err) { |
1188 | err = parse_dirfile(page_address(page), nbytes, file, dstbuf, | 1333 | if (fc->do_readdirplus) { |
1189 | filldir); | 1334 | err = parse_dirplusfile(page_address(page), nbytes, |
1335 | file, dstbuf, filldir, | ||
1336 | attr_version); | ||
1337 | } else { | ||
1338 | err = parse_dirfile(page_address(page), nbytes, file, | ||
1339 | dstbuf, filldir); | ||
1340 | } | ||
1341 | } | ||
1190 | 1342 | ||
1191 | __free_page(page); | 1343 | __free_page(page); |
1192 | fuse_invalidate_attr(inode); /* atime changed */ | 1344 | fuse_invalidate_attr(inode); /* atime changed */ |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e105a53fc72d..5c5055306d3c 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -487,6 +487,9 @@ struct fuse_conn { | |||
487 | /** Use enhanced/automatic page cache invalidation. */ | 487 | /** Use enhanced/automatic page cache invalidation. */ |
488 | unsigned auto_inval_data:1; | 488 | unsigned auto_inval_data:1; |
489 | 489 | ||
490 | /** Does the filesystem support readdir-plus? */ | ||
491 | unsigned do_readdirplus:1; | ||
492 | |||
490 | /** The number of requests waiting for completion */ | 493 | /** The number of requests waiting for completion */ |
491 | atomic_t num_waiting; | 494 | atomic_t num_waiting; |
492 | 495 | ||
@@ -578,6 +581,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, | |||
578 | 581 | ||
579 | struct fuse_forget_link *fuse_alloc_forget(void); | 582 | struct fuse_forget_link *fuse_alloc_forget(void); |
580 | 583 | ||
584 | /* Used by READDIRPLUS */ | ||
585 | void fuse_force_forget(struct file *file, u64 nodeid); | ||
586 | |||
581 | /** | 587 | /** |
582 | * Initialize READ or READDIR request | 588 | * Initialize READ or READDIR request |
583 | */ | 589 | */ |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 73ca6b72beaf..6f7d5746bf52 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -863,6 +863,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
863 | fc->dont_mask = 1; | 863 | fc->dont_mask = 1; |
864 | if (arg->flags & FUSE_AUTO_INVAL_DATA) | 864 | if (arg->flags & FUSE_AUTO_INVAL_DATA) |
865 | fc->auto_inval_data = 1; | 865 | fc->auto_inval_data = 1; |
866 | if (arg->flags & FUSE_DO_READDIRPLUS) | ||
867 | fc->do_readdirplus = 1; | ||
866 | } else { | 868 | } else { |
867 | ra_pages = fc->max_read / PAGE_CACHE_SIZE; | 869 | ra_pages = fc->max_read / PAGE_CACHE_SIZE; |
868 | fc->no_lock = 1; | 870 | fc->no_lock = 1; |
@@ -889,7 +891,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) | |||
889 | arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | | 891 | arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | |
890 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | | 892 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | |
891 | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | | 893 | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | |
892 | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; | 894 | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | |
895 | FUSE_DO_READDIRPLUS; | ||
893 | req->in.h.opcode = FUSE_INIT; | 896 | req->in.h.opcode = FUSE_INIT; |
894 | req->in.numargs = 1; | 897 | req->in.numargs = 1; |
895 | req->in.args[0].size = sizeof(*arg); | 898 | req->in.args[0].size = sizeof(*arg); |
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d8c713e148e3..5dc1fea49ecd 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h | |||
@@ -193,6 +193,7 @@ struct fuse_file_lock { | |||
193 | #define FUSE_FLOCK_LOCKS (1 << 10) | 193 | #define FUSE_FLOCK_LOCKS (1 << 10) |
194 | #define FUSE_HAS_IOCTL_DIR (1 << 11) | 194 | #define FUSE_HAS_IOCTL_DIR (1 << 11) |
195 | #define FUSE_AUTO_INVAL_DATA (1 << 12) | 195 | #define FUSE_AUTO_INVAL_DATA (1 << 12) |
196 | #define FUSE_DO_READDIRPLUS (1 << 13) | ||
196 | 197 | ||
197 | /** | 198 | /** |
198 | * CUSE INIT request/reply flags | 199 | * CUSE INIT request/reply flags |
@@ -299,6 +300,7 @@ enum fuse_opcode { | |||
299 | FUSE_NOTIFY_REPLY = 41, | 300 | FUSE_NOTIFY_REPLY = 41, |
300 | FUSE_BATCH_FORGET = 42, | 301 | FUSE_BATCH_FORGET = 42, |
301 | FUSE_FALLOCATE = 43, | 302 | FUSE_FALLOCATE = 43, |
303 | FUSE_READDIRPLUS = 44, | ||
302 | 304 | ||
303 | /* CUSE specific operations */ | 305 | /* CUSE specific operations */ |
304 | CUSE_INIT = 4096, | 306 | CUSE_INIT = 4096, |
@@ -630,6 +632,16 @@ struct fuse_dirent { | |||
630 | #define FUSE_DIRENT_SIZE(d) \ | 632 | #define FUSE_DIRENT_SIZE(d) \ |
631 | FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) | 633 | FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) |
632 | 634 | ||
635 | struct fuse_direntplus { | ||
636 | struct fuse_entry_out entry_out; | ||
637 | struct fuse_dirent dirent; | ||
638 | }; | ||
639 | |||
640 | #define FUSE_NAME_OFFSET_DIRENTPLUS \ | ||
641 | offsetof(struct fuse_direntplus, dirent.name) | ||
642 | #define FUSE_DIRENTPLUS_SIZE(d) \ | ||
643 | FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) | ||
644 | |||
633 | struct fuse_notify_inval_inode_out { | 645 | struct fuse_notify_inval_inode_out { |
634 | __u64 ino; | 646 | __u64 ino; |
635 | __s64 off; | 647 | __s64 off; |