aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/dir.c
diff options
context:
space:
mode:
authorFan Yong <yong.fan@whamcloud.com>2012-03-18 22:44:40 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-03-18 22:44:40 -0400
commitd1f5273e9adb40724a85272f248f210dc4ce919a (patch)
tree1ddb119dab8247ab7d7774394094c61161013f2a /fs/ext4/dir.c
parent6a8a13e03861c0ab83ab07d573ca793cff0e5d00 (diff)
ext4: return 32/64-bit dir name hash according to usage type
Traditionally ext2/3/4 has returned a 32-bit hash value from llseek() to appease NFSv2, which can only handle a 32-bit cookie for seekdir() and telldir(). However, this causes problems if there are 32-bit hash collisions, since the NFSv2 server can get stuck resending the same entries from the directory repeatedly. Allow ext4 to return a full 64-bit hash (both major and minor) for telldir to decrease the chance of hash collisions. This still needs integration on the NFS side. Patch-updated-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de> (blame me if something is not correct) Signed-off-by: Fan Yong <yong.fan@whamcloud.com> Signed-off-by: Andreas Dilger <adilger@whamcloud.com> Signed-off-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/dir.c')
-rw-r--r--fs/ext4/dir.c214
1 files changed, 169 insertions, 45 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 164c56092e58..689d1b1a3f45 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
33}; 33};
34 34
35static int ext4_readdir(struct file *, void *, filldir_t);
36static int ext4_dx_readdir(struct file *filp, 35static int ext4_dx_readdir(struct file *filp,
37 void *dirent, filldir_t filldir); 36 void *dirent, filldir_t filldir);
38static int ext4_release_dir(struct inode *inode,
39 struct file *filp);
40
41const struct file_operations ext4_dir_operations = {
42 .llseek = ext4_llseek,
43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .unlocked_ioctl = ext4_ioctl,
46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext4_compat_ioctl,
48#endif
49 .fsync = ext4_sync_file,
50 .release = ext4_release_dir,
51};
52
53 37
54static unsigned char get_dtype(struct super_block *sb, int filetype) 38static unsigned char get_dtype(struct super_block *sb, int filetype)
55{ 39{
@@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
60 return (ext4_filetype_table[filetype]); 44 return (ext4_filetype_table[filetype]);
61} 45}
62 46
47/**
48 * Check if the given dir-inode refers to an htree-indexed directory
49 * (or a directory which chould potentially get coverted to use htree
50 * indexing).
51 *
52 * Return 1 if it is a dx dir, 0 if not
53 */
54static int is_dx_dir(struct inode *inode)
55{
56 struct super_block *sb = inode->i_sb;
57
58 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
59 EXT4_FEATURE_COMPAT_DIR_INDEX) &&
60 ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
61 ((inode->i_size >> sb->s_blocksize_bits) == 1)))
62 return 1;
63
64 return 0;
65}
66
63/* 67/*
64 * Return 0 if the directory entry is OK, and 1 if there is a problem 68 * Return 0 if the directory entry is OK, and 1 if there is a problem
65 * 69 *
@@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp,
115 unsigned int offset; 119 unsigned int offset;
116 int i, stored; 120 int i, stored;
117 struct ext4_dir_entry_2 *de; 121 struct ext4_dir_entry_2 *de;
118 struct super_block *sb;
119 int err; 122 int err;
120 struct inode *inode = filp->f_path.dentry->d_inode; 123 struct inode *inode = filp->f_path.dentry->d_inode;
124 struct super_block *sb = inode->i_sb;
121 int ret = 0; 125 int ret = 0;
122 int dir_has_error = 0; 126 int dir_has_error = 0;
123 127
124 sb = inode->i_sb; 128 if (is_dx_dir(inode)) {
125
126 if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
127 EXT4_FEATURE_COMPAT_DIR_INDEX) &&
128 ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
129 ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
130 err = ext4_dx_readdir(filp, dirent, filldir); 129 err = ext4_dx_readdir(filp, dirent, filldir);
131 if (err != ERR_BAD_DX_DIR) { 130 if (err != ERR_BAD_DX_DIR) {
132 ret = err; 131 ret = err;
@@ -254,22 +253,134 @@ out:
254 return ret; 253 return ret;
255} 254}
256 255
256static inline int is_32bit_api(void)
257{
258#ifdef CONFIG_COMPAT
259 return is_compat_task();
260#else
261 return (BITS_PER_LONG == 32);
262#endif
263}
264
257/* 265/*
258 * These functions convert from the major/minor hash to an f_pos 266 * These functions convert from the major/minor hash to an f_pos
259 * value. 267 * value for dx directories
268 *
269 * Upper layer (for example NFS) should specify FMODE_32BITHASH or
270 * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
271 * directly on both 32-bit and 64-bit nodes, under such case, neither
272 * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
273 */
274static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
275{
276 if ((filp->f_mode & FMODE_32BITHASH) ||
277 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
278 return major >> 1;
279 else
280 return ((__u64)(major >> 1) << 32) | (__u64)minor;
281}
282
283static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
284{
285 if ((filp->f_mode & FMODE_32BITHASH) ||
286 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
287 return (pos << 1) & 0xffffffff;
288 else
289 return ((pos >> 32) << 1) & 0xffffffff;
290}
291
292static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
293{
294 if ((filp->f_mode & FMODE_32BITHASH) ||
295 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
296 return 0;
297 else
298 return pos & 0xffffffff;
299}
300
301/*
302 * Return 32- or 64-bit end-of-file for dx directories
303 */
304static inline loff_t ext4_get_htree_eof(struct file *filp)
305{
306 if ((filp->f_mode & FMODE_32BITHASH) ||
307 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
308 return EXT4_HTREE_EOF_32BIT;
309 else
310 return EXT4_HTREE_EOF_64BIT;
311}
312
313
314/*
315 * ext4_dir_llseek() based on generic_file_llseek() to handle both
316 * non-htree and htree directories, where the "offset" is in terms
317 * of the filename hash value instead of the byte offset.
260 * 318 *
261 * Currently we only use major hash numer. This is unfortunate, but 319 * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
262 * on 32-bit machines, the same VFS interface is used for lseek and 320 * will be invalid once the directory was converted into a dx directory
263 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
264 * lseek/telldir/seekdir will blow out spectacularly, and from within
265 * the ext2 low-level routine, we don't know if we're being called by
266 * a 64-bit version of the system call or the 32-bit version of the
267 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
268 * cookie. Sigh.
269 */ 321 */
270#define hash2pos(major, minor) (major >> 1) 322loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
271#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) 323{
272#define pos2min_hash(pos) (0) 324 struct inode *inode = file->f_mapping->host;
325 loff_t ret = -EINVAL;
326 int dx_dir = is_dx_dir(inode);
327
328 mutex_lock(&inode->i_mutex);
329
330 /* NOTE: relative offsets with dx directories might not work
331 * as expected, as it is difficult to figure out the
332 * correct offset between dx hashes */
333
334 switch (origin) {
335 case SEEK_END:
336 if (unlikely(offset > 0))
337 goto out_err; /* not supported for directories */
338
339 /* so only negative offsets are left, does that have a
340 * meaning for directories at all? */
341 if (dx_dir)
342 offset += ext4_get_htree_eof(file);
343 else
344 offset += inode->i_size;
345 break;
346 case SEEK_CUR:
347 /*
348 * Here we special-case the lseek(fd, 0, SEEK_CUR)
349 * position-querying operation. Avoid rewriting the "same"
350 * f_pos value back to the file because a concurrent read(),
351 * write() or lseek() might have altered it
352 */
353 if (offset == 0) {
354 offset = file->f_pos;
355 goto out_ok;
356 }
357
358 offset += file->f_pos;
359 break;
360 }
361
362 if (unlikely(offset < 0))
363 goto out_err;
364
365 if (!dx_dir) {
366 if (offset > inode->i_sb->s_maxbytes)
367 goto out_err;
368 } else if (offset > ext4_get_htree_eof(file))
369 goto out_err;
370
371 /* Special lock needed here? */
372 if (offset != file->f_pos) {
373 file->f_pos = offset;
374 file->f_version = 0;
375 }
376
377out_ok:
378 ret = offset;
379out_err:
380 mutex_unlock(&inode->i_mutex);
381
382 return ret;
383}
273 384
274/* 385/*
275 * This structure holds the nodes of the red-black tree used to store 386 * This structure holds the nodes of the red-black tree used to store
@@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root)
330} 441}
331 442
332 443
333static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) 444static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
445 loff_t pos)
334{ 446{
335 struct dir_private_info *p; 447 struct dir_private_info *p;
336 448
337 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); 449 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
338 if (!p) 450 if (!p)
339 return NULL; 451 return NULL;
340 p->curr_hash = pos2maj_hash(pos); 452 p->curr_hash = pos2maj_hash(filp, pos);
341 p->curr_minor_hash = pos2min_hash(pos); 453 p->curr_minor_hash = pos2min_hash(filp, pos);
342 return p; 454 return p;
343} 455}
344 456
@@ -429,7 +541,7 @@ static int call_filldir(struct file *filp, void *dirent,
429 "null fname?!?\n"); 541 "null fname?!?\n");
430 return 0; 542 return 0;
431 } 543 }
432 curr_pos = hash2pos(fname->hash, fname->minor_hash); 544 curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
433 while (fname) { 545 while (fname) {
434 error = filldir(dirent, fname->name, 546 error = filldir(dirent, fname->name,
435 fname->name_len, curr_pos, 547 fname->name_len, curr_pos,
@@ -454,13 +566,13 @@ static int ext4_dx_readdir(struct file *filp,
454 int ret; 566 int ret;
455 567
456 if (!info) { 568 if (!info) {
457 info = ext4_htree_create_dir_info(filp->f_pos); 569 info = ext4_htree_create_dir_info(filp, filp->f_pos);
458 if (!info) 570 if (!info)
459 return -ENOMEM; 571 return -ENOMEM;
460 filp->private_data = info; 572 filp->private_data = info;
461 } 573 }
462 574
463 if (filp->f_pos == EXT4_HTREE_EOF) 575 if (filp->f_pos == ext4_get_htree_eof(filp))
464 return 0; /* EOF */ 576 return 0; /* EOF */
465 577
466 /* Some one has messed with f_pos; reset the world */ 578 /* Some one has messed with f_pos; reset the world */
@@ -468,8 +580,8 @@ static int ext4_dx_readdir(struct file *filp,
468 free_rb_tree_fname(&info->root); 580 free_rb_tree_fname(&info->root);
469 info->curr_node = NULL; 581 info->curr_node = NULL;
470 info->extra_fname = NULL; 582 info->extra_fname = NULL;
471 info->curr_hash = pos2maj_hash(filp->f_pos); 583 info->curr_hash = pos2maj_hash(filp, filp->f_pos);
472 info->curr_minor_hash = pos2min_hash(filp->f_pos); 584 info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
473 } 585 }
474 586
475 /* 587 /*
@@ -501,7 +613,7 @@ static int ext4_dx_readdir(struct file *filp,
501 if (ret < 0) 613 if (ret < 0)
502 return ret; 614 return ret;
503 if (ret == 0) { 615 if (ret == 0) {
504 filp->f_pos = EXT4_HTREE_EOF; 616 filp->f_pos = ext4_get_htree_eof(filp);
505 break; 617 break;
506 } 618 }
507 info->curr_node = rb_first(&info->root); 619 info->curr_node = rb_first(&info->root);
@@ -521,7 +633,7 @@ static int ext4_dx_readdir(struct file *filp,
521 info->curr_minor_hash = fname->minor_hash; 633 info->curr_minor_hash = fname->minor_hash;
522 } else { 634 } else {
523 if (info->next_hash == ~0) { 635 if (info->next_hash == ~0) {
524 filp->f_pos = EXT4_HTREE_EOF; 636 filp->f_pos = ext4_get_htree_eof(filp);
525 break; 637 break;
526 } 638 }
527 info->curr_hash = info->next_hash; 639 info->curr_hash = info->next_hash;
@@ -540,3 +652,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
540 652
541 return 0; 653 return 0;
542} 654}
655
656const struct file_operations ext4_dir_operations = {
657 .llseek = ext4_dir_llseek,
658 .read = generic_read_dir,
659 .readdir = ext4_readdir,
660 .unlocked_ioctl = ext4_ioctl,
661#ifdef CONFIG_COMPAT
662 .compat_ioctl = ext4_compat_ioctl,
663#endif
664 .fsync = ext4_sync_file,
665 .release = ext4_release_dir,
666};