aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@redhat.com>2012-03-19 12:34:39 -0400
committerJ. Bruce Fields <bfields@redhat.com>2012-03-19 12:35:05 -0400
commit62b9510cb373d5722fdaba71d961d8f695acfcd5 (patch)
tree5a16a206a7e1aac18b96bed26f21ebbe15605fa6
parent8546ee518c6662ddb3075249fb31d89e5dbfb7d5 (diff)
parent06effdbb49af5f6c7d20affaec74603914acc768 (diff)
nfsd: merge cookie collision fixes from ext4 tree
These changes fix readdir loops on ext4 filesystems with dir_index turned on. I'm pulling them from Ted's tree as I'd like to give them some extra nfsd testing, and expect to be applying (potentially conflicting) patches to the same code before the next merge window. From the nfs-ext4-premerge branch of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Signed-off-by: J. Bruce Fields <bfields@redhat.com>
-rw-r--r--fs/ext4/dir.c214
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/hash.c4
-rw-r--r--fs/nfsd/vfs.c33
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--include/linux/fs.h4
6 files changed, 205 insertions, 58 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 164c56092e58..689d1b1a3f45 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
33}; 33};
34 34
35static int ext4_readdir(struct file *, void *, filldir_t);
36static int ext4_dx_readdir(struct file *filp, 35static int ext4_dx_readdir(struct file *filp,
37 void *dirent, filldir_t filldir); 36 void *dirent, filldir_t filldir);
38static int ext4_release_dir(struct inode *inode,
39 struct file *filp);
40
41const struct file_operations ext4_dir_operations = {
42 .llseek = ext4_llseek,
43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .unlocked_ioctl = ext4_ioctl,
46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext4_compat_ioctl,
48#endif
49 .fsync = ext4_sync_file,
50 .release = ext4_release_dir,
51};
52
53 37
54static unsigned char get_dtype(struct super_block *sb, int filetype) 38static unsigned char get_dtype(struct super_block *sb, int filetype)
55{ 39{
@@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
60 return (ext4_filetype_table[filetype]); 44 return (ext4_filetype_table[filetype]);
61} 45}
62 46
47/**
48 * Check if the given dir-inode refers to an htree-indexed directory
49 * (or a directory which chould potentially get coverted to use htree
50 * indexing).
51 *
52 * Return 1 if it is a dx dir, 0 if not
53 */
54static int is_dx_dir(struct inode *inode)
55{
56 struct super_block *sb = inode->i_sb;
57
58 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
59 EXT4_FEATURE_COMPAT_DIR_INDEX) &&
60 ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
61 ((inode->i_size >> sb->s_blocksize_bits) == 1)))
62 return 1;
63
64 return 0;
65}
66
63/* 67/*
64 * Return 0 if the directory entry is OK, and 1 if there is a problem 68 * Return 0 if the directory entry is OK, and 1 if there is a problem
65 * 69 *
@@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp,
115 unsigned int offset; 119 unsigned int offset;
116 int i, stored; 120 int i, stored;
117 struct ext4_dir_entry_2 *de; 121 struct ext4_dir_entry_2 *de;
118 struct super_block *sb;
119 int err; 122 int err;
120 struct inode *inode = filp->f_path.dentry->d_inode; 123 struct inode *inode = filp->f_path.dentry->d_inode;
124 struct super_block *sb = inode->i_sb;
121 int ret = 0; 125 int ret = 0;
122 int dir_has_error = 0; 126 int dir_has_error = 0;
123 127
124 sb = inode->i_sb; 128 if (is_dx_dir(inode)) {
125
126 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
127 EXT4_FEATURE_COMPAT_DIR_INDEX) &&
128 ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
129 ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
130 err = ext4_dx_readdir(filp, dirent, filldir); 129 err = ext4_dx_readdir(filp, dirent, filldir);
131 if (err != ERR_BAD_DX_DIR) { 130 if (err != ERR_BAD_DX_DIR) {
132 ret = err; 131 ret = err;
@@ -254,22 +253,134 @@ out:
254 return ret; 253 return ret;
255} 254}
256 255
256static inline int is_32bit_api(void)
257{
258#ifdef CONFIG_COMPAT
259 return is_compat_task();
260#else
261 return (BITS_PER_LONG == 32);
262#endif
263}
264
257/* 265/*
258 * These functions convert from the major/minor hash to an f_pos 266 * These functions convert from the major/minor hash to an f_pos
259 * value. 267 * value for dx directories
268 *
269 * Upper layer (for example NFS) should specify FMODE_32BITHASH or
270 * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
271 * directly on both 32-bit and 64-bit nodes, under such case, neither
272 * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
273 */
274static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
275{
276 if ((filp->f_mode & FMODE_32BITHASH) ||
277 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
278 return major >> 1;
279 else
280 return ((__u64)(major >> 1) << 32) | (__u64)minor;
281}
282
283static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
284{
285 if ((filp->f_mode & FMODE_32BITHASH) ||
286 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
287 return (pos << 1) & 0xffffffff;
288 else
289 return ((pos >> 32) << 1) & 0xffffffff;
290}
291
292static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
293{
294 if ((filp->f_mode & FMODE_32BITHASH) ||
295 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
296 return 0;
297 else
298 return pos & 0xffffffff;
299}
300
301/*
302 * Return 32- or 64-bit end-of-file for dx directories
303 */
304static inline loff_t ext4_get_htree_eof(struct file *filp)
305{
306 if ((filp->f_mode & FMODE_32BITHASH) ||
307 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
308 return EXT4_HTREE_EOF_32BIT;
309 else
310 return EXT4_HTREE_EOF_64BIT;
311}
312
313
314/*
315 * ext4_dir_llseek() based on generic_file_llseek() to handle both
316 * non-htree and htree directories, where the "offset" is in terms
317 * of the filename hash value instead of the byte offset.
260 * 318 *
261 * Currently we only use major hash numer. This is unfortunate, but 319 * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
262 * on 32-bit machines, the same VFS interface is used for lseek and 320 * will be invalid once the directory was converted into a dx directory
263 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
264 * lseek/telldir/seekdir will blow out spectacularly, and from within
265 * the ext2 low-level routine, we don't know if we're being called by
266 * a 64-bit version of the system call or the 32-bit version of the
267 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
268 * cookie. Sigh.
269 */ 321 */
270#define hash2pos(major, minor) (major >> 1) 322loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
271#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) 323{
272#define pos2min_hash(pos) (0) 324 struct inode *inode = file->f_mapping->host;
325 loff_t ret = -EINVAL;
326 int dx_dir = is_dx_dir(inode);
327
328 mutex_lock(&inode->i_mutex);
329
330 /* NOTE: relative offsets with dx directories might not work
331 * as expected, as it is difficult to figure out the
332 * correct offset between dx hashes */
333
334 switch (origin) {
335 case SEEK_END:
336 if (unlikely(offset > 0))
337 goto out_err; /* not supported for directories */
338
339 /* so only negative offsets are left, does that have a
340 * meaning for directories at all? */
341 if (dx_dir)
342 offset += ext4_get_htree_eof(file);
343 else
344 offset += inode->i_size;
345 break;
346 case SEEK_CUR:
347 /*
348 * Here we special-case the lseek(fd, 0, SEEK_CUR)
349 * position-querying operation. Avoid rewriting the "same"
350 * f_pos value back to the file because a concurrent read(),
351 * write() or lseek() might have altered it
352 */
353 if (offset == 0) {
354 offset = file->f_pos;
355 goto out_ok;
356 }
357
358 offset += file->f_pos;
359 break;
360 }
361
362 if (unlikely(offset < 0))
363 goto out_err;
364
365 if (!dx_dir) {
366 if (offset > inode->i_sb->s_maxbytes)
367 goto out_err;
368 } else if (offset > ext4_get_htree_eof(file))
369 goto out_err;
370
371 /* Special lock needed here? */
372 if (offset != file->f_pos) {
373 file->f_pos = offset;
374 file->f_version = 0;
375 }
376
377out_ok:
378 ret = offset;
379out_err:
380 mutex_unlock(&inode->i_mutex);
381
382 return ret;
383}
273 384
274/* 385/*
275 * This structure holds the nodes of the red-black tree used to store 386 * This structure holds the nodes of the red-black tree used to store
@@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root)
330} 441}
331 442
332 443
333static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) 444static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
445 loff_t pos)
334{ 446{
335 struct dir_private_info *p; 447 struct dir_private_info *p;
336 448
337 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); 449 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
338 if (!p) 450 if (!p)
339 return NULL; 451 return NULL;
340 p->curr_hash = pos2maj_hash(pos); 452 p->curr_hash = pos2maj_hash(filp, pos);
341 p->curr_minor_hash = pos2min_hash(pos); 453 p->curr_minor_hash = pos2min_hash(filp, pos);
342 return p; 454 return p;
343} 455}
344 456
@@ -429,7 +541,7 @@ static int call_filldir(struct file *filp, void *dirent,
429 "null fname?!?\n"); 541 "null fname?!?\n");
430 return 0; 542 return 0;
431 } 543 }
432 curr_pos = hash2pos(fname->hash, fname->minor_hash); 544 curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
433 while (fname) { 545 while (fname) {
434 error = filldir(dirent, fname->name, 546 error = filldir(dirent, fname->name,
435 fname->name_len, curr_pos, 547 fname->name_len, curr_pos,
@@ -454,13 +566,13 @@ static int ext4_dx_readdir(struct file *filp,
454 int ret; 566 int ret;
455 567
456 if (!info) { 568 if (!info) {
457 info = ext4_htree_create_dir_info(filp->f_pos); 569 info = ext4_htree_create_dir_info(filp, filp->f_pos);
458 if (!info) 570 if (!info)
459 return -ENOMEM; 571 return -ENOMEM;
460 filp->private_data = info; 572 filp->private_data = info;
461 } 573 }
462 574
463 if (filp->f_pos == EXT4_HTREE_EOF) 575 if (filp->f_pos == ext4_get_htree_eof(filp))
464 return 0; /* EOF */ 576 return 0; /* EOF */
465 577
466 /* Some one has messed with f_pos; reset the world */ 578 /* Some one has messed with f_pos; reset the world */
@@ -468,8 +580,8 @@ static int ext4_dx_readdir(struct file *filp,
468 free_rb_tree_fname(&info->root); 580 free_rb_tree_fname(&info->root);
469 info->curr_node = NULL; 581 info->curr_node = NULL;
470 info->extra_fname = NULL; 582 info->extra_fname = NULL;
471 info->curr_hash = pos2maj_hash(filp->f_pos); 583 info->curr_hash = pos2maj_hash(filp, filp->f_pos);
472 info->curr_minor_hash = pos2min_hash(filp->f_pos); 584 info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
473 } 585 }
474 586
475 /* 587 /*
@@ -501,7 +613,7 @@ static int ext4_dx_readdir(struct file *filp,
501 if (ret < 0) 613 if (ret < 0)
502 return ret; 614 return ret;
503 if (ret == 0) { 615 if (ret == 0) {
504 filp->f_pos = EXT4_HTREE_EOF; 616 filp->f_pos = ext4_get_htree_eof(filp);
505 break; 617 break;
506 } 618 }
507 info->curr_node = rb_first(&info->root); 619 info->curr_node = rb_first(&info->root);
@@ -521,7 +633,7 @@ static int ext4_dx_readdir(struct file *filp,
521 info->curr_minor_hash = fname->minor_hash; 633 info->curr_minor_hash = fname->minor_hash;
522 } else { 634 } else {
523 if (info->next_hash == ~0) { 635 if (info->next_hash == ~0) {
524 filp->f_pos = EXT4_HTREE_EOF; 636 filp->f_pos = ext4_get_htree_eof(filp);
525 break; 637 break;
526 } 638 }
527 info->curr_hash = info->next_hash; 639 info->curr_hash = info->next_hash;
@@ -540,3 +652,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
540 652
541 return 0; 653 return 0;
542} 654}
655
656const struct file_operations ext4_dir_operations = {
657 .llseek = ext4_dir_llseek,
658 .read = generic_read_dir,
659 .readdir = ext4_readdir,
660 .unlocked_ioctl = ext4_ioctl,
661#ifdef CONFIG_COMPAT
662 .compat_ioctl = ext4_compat_ioctl,
663#endif
664 .fsync = ext4_sync_file,
665 .release = ext4_release_dir,
666};
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 513004fc3d84..8b64a00502a0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1612,7 +1612,11 @@ struct dx_hash_info
1612 u32 *seed; 1612 u32 *seed;
1613}; 1613};
1614 1614
1615#define EXT4_HTREE_EOF 0x7fffffff 1615
1616/* 32 and 64 bit signed EOF for dx directories */
1617#define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
1618#define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
1619
1616 1620
1617/* 1621/*
1618 * Control parameters used by ext4_htree_next_block 1622 * Control parameters used by ext4_htree_next_block
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index ac8f168c8ab4..fa8e4911d354 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -200,8 +200,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
200 return -1; 200 return -1;
201 } 201 }
202 hash = hash & ~1; 202 hash = hash & ~1;
203 if (hash == (EXT4_HTREE_EOF << 1)) 203 if (hash == (EXT4_HTREE_EOF_32BIT << 1))
204 hash = (EXT4_HTREE_EOF-1) << 1; 204 hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
205 hinfo->hash = hash; 205 hinfo->hash = hash;
206 hinfo->minor_hash = minor_hash; 206 hinfo->minor_hash = minor_hash;
207 return 0; 207 return 0;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index edf6d3ed8777..7423d712eb8c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -737,12 +737,13 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
737 737
738/* 738/*
739 * Open an existing file or directory. 739 * Open an existing file or directory.
740 * The access argument indicates the type of open (read/write/lock) 740 * The may_flags argument indicates the type of open (read/write/lock)
741 * and additional flags.
741 * N.B. After this call fhp needs an fh_put 742 * N.B. After this call fhp needs an fh_put
742 */ 743 */
743__be32 744__be32
744nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 745nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
745 int access, struct file **filp) 746 int may_flags, struct file **filp)
746{ 747{
747 struct dentry *dentry; 748 struct dentry *dentry;
748 struct inode *inode; 749 struct inode *inode;
@@ -757,7 +758,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
757 * and (hopefully) checked permission - so allow OWNER_OVERRIDE 758 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
758 * in case a chmod has now revoked permission. 759 * in case a chmod has now revoked permission.
759 */ 760 */
760 err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE); 761 err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE);
761 if (err) 762 if (err)
762 goto out; 763 goto out;
763 764
@@ -768,7 +769,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
768 * or any access when mandatory locking enabled 769 * or any access when mandatory locking enabled
769 */ 770 */
770 err = nfserr_perm; 771 err = nfserr_perm;
771 if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE)) 772 if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
772 goto out; 773 goto out;
773 /* 774 /*
774 * We must ignore files (but only files) which might have mandatory 775 * We must ignore files (but only files) which might have mandatory
@@ -781,12 +782,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
781 if (!inode->i_fop) 782 if (!inode->i_fop)
782 goto out; 783 goto out;
783 784
784 host_err = nfsd_open_break_lease(inode, access); 785 host_err = nfsd_open_break_lease(inode, may_flags);
785 if (host_err) /* NOMEM or WOULDBLOCK */ 786 if (host_err) /* NOMEM or WOULDBLOCK */
786 goto out_nfserr; 787 goto out_nfserr;
787 788
788 if (access & NFSD_MAY_WRITE) { 789 if (may_flags & NFSD_MAY_WRITE) {
789 if (access & NFSD_MAY_READ) 790 if (may_flags & NFSD_MAY_READ)
790 flags = O_RDWR|O_LARGEFILE; 791 flags = O_RDWR|O_LARGEFILE;
791 else 792 else
792 flags = O_WRONLY|O_LARGEFILE; 793 flags = O_WRONLY|O_LARGEFILE;
@@ -795,8 +796,15 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
795 flags, current_cred()); 796 flags, current_cred());
796 if (IS_ERR(*filp)) 797 if (IS_ERR(*filp))
797 host_err = PTR_ERR(*filp); 798 host_err = PTR_ERR(*filp);
798 else 799 else {
799 host_err = ima_file_check(*filp, access); 800 host_err = ima_file_check(*filp, may_flags);
801
802 if (may_flags & NFSD_MAY_64BIT_COOKIE)
803 (*filp)->f_mode |= FMODE_64BITHASH;
804 else
805 (*filp)->f_mode |= FMODE_32BITHASH;
806 }
807
800out_nfserr: 808out_nfserr:
801 err = nfserrno(host_err); 809 err = nfserrno(host_err);
802out: 810out:
@@ -2020,8 +2028,13 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
2020 __be32 err; 2028 __be32 err;
2021 struct file *file; 2029 struct file *file;
2022 loff_t offset = *offsetp; 2030 loff_t offset = *offsetp;
2031 int may_flags = NFSD_MAY_READ;
2032
2033 /* NFSv2 only supports 32 bit cookies */
2034 if (rqstp->rq_vers > 2)
2035 may_flags |= NFSD_MAY_64BIT_COOKIE;
2023 2036
2024 err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file); 2037 err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
2025 if (err) 2038 if (err)
2026 goto out; 2039 goto out;
2027 2040
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 1dcd238e11a0..ec0611b2b738 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -27,6 +27,8 @@
27#define NFSD_MAY_BYPASS_GSS 0x400 27#define NFSD_MAY_BYPASS_GSS 0x400
28#define NFSD_MAY_READ_IF_EXEC 0x800 28#define NFSD_MAY_READ_IF_EXEC 0x800
29 29
30#define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */
31
30#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) 32#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
31#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) 33#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
32 34
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 386da09f229d..8975a5602931 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -92,6 +92,10 @@ struct inodes_stat_t {
92/* File is opened using open(.., 3, ..) and is writeable only for ioctls 92/* File is opened using open(.., 3, ..) and is writeable only for ioctls
93 (specialy hack for floppy.c) */ 93 (specialy hack for floppy.c) */
94#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) 94#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100)
95/* 32bit hashes as llseek() offset (for directories) */
96#define FMODE_32BITHASH ((__force fmode_t)0x200)
97/* 64bit hashes as llseek() offset (for directories) */
98#define FMODE_64BITHASH ((__force fmode_t)0x400)
95 99
96/* 100/*
97 * Don't update ctime and mtime. 101 * Don't update ctime and mtime.