aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Sandeen <sandeen@redhat.com>2012-04-26 14:10:39 -0400
committerJan Kara <jack@suse.cz>2012-05-15 17:34:39 -0400
commitd7dab39b6e16d5eea78ed3c705d2a2d0772b4f06 (patch)
treea2b201b2afd8cd1d278947f7e080ce7457e88814
parenta80b12c3d08dbbf15e6a551e481c32a2df4911f3 (diff)
ext3: return 32/64-bit dir name hash according to usage type
This is based on commit d1f5273e9adb40724a85272f248f210dc4ce919a ext4: return 32/64-bit dir name hash according to usage type by Fan Yong <yong.fan@whamcloud.com> Traditionally ext2/3/4 has returned a 32-bit hash value from llseek() to appease NFSv2, which can only handle a 32-bit cookie for seekdir() and telldir(). However, this causes problems if there are 32-bit hash collisions, since the NFSv2 server can get stuck resending the same entries from the directory repeatedly. Allow ext3 to return a full 64-bit hash (both major and minor) for telldir to decrease the chance of hash collisions. This patch does implement a new ext3_dir_llseek op, because with 64-bit hashes, nfs will attempt to seek to a hash "offset" which is much larger than ext3's s_maxbytes. So for dx dirs, we call generic_file_llseek_size() with the appropriate max hash value as the maximum seekable size. Otherwise we just pass through to generic_file_llseek(). Patch-updated-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de> Patch-updated-by: Eric Sandeen <sandeen@redhat.com> (blame us if something is not correct) Signed-off-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r--fs/ext3/dir.c167
-rw-r--r--fs/ext3/ext3.h6
-rw-r--r--fs/ext3/hash.c4
3 files changed, 129 insertions, 48 deletions
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index cc761ad8fa57..92490e9f85ca 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -21,30 +21,15 @@
21 * 21 *
22 */ 22 */
23 23
24#include <linux/compat.h>
24#include "ext3.h" 25#include "ext3.h"
25 26
26static unsigned char ext3_filetype_table[] = { 27static unsigned char ext3_filetype_table[] = {
27 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 28 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
28}; 29};
29 30
30static int ext3_readdir(struct file *, void *, filldir_t);
31static int ext3_dx_readdir(struct file * filp, 31static int ext3_dx_readdir(struct file * filp,
32 void * dirent, filldir_t filldir); 32 void * dirent, filldir_t filldir);
33static int ext3_release_dir (struct inode * inode,
34 struct file * filp);
35
36const struct file_operations ext3_dir_operations = {
37 .llseek = generic_file_llseek,
38 .read = generic_read_dir,
39 .readdir = ext3_readdir, /* we take BKL. needed?*/
40 .unlocked_ioctl = ext3_ioctl,
41#ifdef CONFIG_COMPAT
42 .compat_ioctl = ext3_compat_ioctl,
43#endif
44 .fsync = ext3_sync_file, /* BKL held */
45 .release = ext3_release_dir,
46};
47
48 33
49static unsigned char get_dtype(struct super_block *sb, int filetype) 34static unsigned char get_dtype(struct super_block *sb, int filetype)
50{ 35{
@@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
55 return (ext3_filetype_table[filetype]); 40 return (ext3_filetype_table[filetype]);
56} 41}
57 42
43/**
44 * Check if the given dir-inode refers to an htree-indexed directory
45 * (or a directory which chould potentially get coverted to use htree
46 * indexing).
47 *
48 * Return 1 if it is a dx dir, 0 if not
49 */
50static int is_dx_dir(struct inode *inode)
51{
52 struct super_block *sb = inode->i_sb;
53
54 if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
55 EXT3_FEATURE_COMPAT_DIR_INDEX) &&
56 ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
57 ((inode->i_size >> sb->s_blocksize_bits) == 1)))
58 return 1;
59
60 return 0;
61}
58 62
59int ext3_check_dir_entry (const char * function, struct inode * dir, 63int ext3_check_dir_entry (const char * function, struct inode * dir,
60 struct ext3_dir_entry_2 * de, 64 struct ext3_dir_entry_2 * de,
@@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp,
94 unsigned long offset; 98 unsigned long offset;
95 int i, stored; 99 int i, stored;
96 struct ext3_dir_entry_2 *de; 100 struct ext3_dir_entry_2 *de;
97 struct super_block *sb;
98 int err; 101 int err;
99 struct inode *inode = filp->f_path.dentry->d_inode; 102 struct inode *inode = filp->f_path.dentry->d_inode;
103 struct super_block *sb = inode->i_sb;
100 int ret = 0; 104 int ret = 0;
101 int dir_has_error = 0; 105 int dir_has_error = 0;
102 106
103 sb = inode->i_sb; 107 if (is_dx_dir(inode)) {
104
105 if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
106 EXT3_FEATURE_COMPAT_DIR_INDEX) &&
107 ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
108 ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
109 err = ext3_dx_readdir(filp, dirent, filldir); 108 err = ext3_dx_readdir(filp, dirent, filldir);
110 if (err != ERR_BAD_DX_DIR) { 109 if (err != ERR_BAD_DX_DIR) {
111 ret = err; 110 ret = err;
@@ -227,22 +226,87 @@ out:
227 return ret; 226 return ret;
228} 227}
229 228
229static inline int is_32bit_api(void)
230{
231#ifdef CONFIG_COMPAT
232 return is_compat_task();
233#else
234 return (BITS_PER_LONG == 32);
235#endif
236}
237
230/* 238/*
231 * These functions convert from the major/minor hash to an f_pos 239 * These functions convert from the major/minor hash to an f_pos
232 * value. 240 * value for dx directories
233 * 241 *
234 * Currently we only use major hash numer. This is unfortunate, but 242 * Upper layer (for example NFS) should specify FMODE_32BITHASH or
235 * on 32-bit machines, the same VFS interface is used for lseek and 243 * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted
236 * llseek, so if we use the 64 bit offset, then the 32-bit versions of 244 * directly on both 32-bit and 64-bit nodes, under such case, neither
237 * lseek/telldir/seekdir will blow out spectacularly, and from within 245 * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
238 * the ext2 low-level routine, we don't know if we're being called by
239 * a 64-bit version of the system call or the 32-bit version of the
240 * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
241 * cookie. Sigh.
242 */ 246 */
243#define hash2pos(major, minor) (major >> 1) 247static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
244#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) 248{
245#define pos2min_hash(pos) (0) 249 if ((filp->f_mode & FMODE_32BITHASH) ||
250 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
251 return major >> 1;
252 else
253 return ((__u64)(major >> 1) << 32) | (__u64)minor;
254}
255
256static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
257{
258 if ((filp->f_mode & FMODE_32BITHASH) ||
259 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
260 return (pos << 1) & 0xffffffff;
261 else
262 return ((pos >> 32) << 1) & 0xffffffff;
263}
264
265static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
266{
267 if ((filp->f_mode & FMODE_32BITHASH) ||
268 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
269 return 0;
270 else
271 return pos & 0xffffffff;
272}
273
274/*
275 * Return 32- or 64-bit end-of-file for dx directories
276 */
277static inline loff_t ext3_get_htree_eof(struct file *filp)
278{
279 if ((filp->f_mode & FMODE_32BITHASH) ||
280 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
281 return EXT3_HTREE_EOF_32BIT;
282 else
283 return EXT3_HTREE_EOF_64BIT;
284}
285
286
287/*
288 * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both
289 * non-htree and htree directories, where the "offset" is in terms
290 * of the filename hash value instead of the byte offset.
291 *
292 * Because we may return a 64-bit hash that is well beyond s_maxbytes,
293 * we need to pass the max hash as the maximum allowable offset in
294 * the htree directory case.
295 *
296 * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
297 * will be invalid once the directory was converted into a dx directory
298 */
299loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
300{
301 struct inode *inode = file->f_mapping->host;
302 int dx_dir = is_dx_dir(inode);
303
304 if (likely(dx_dir))
305 return generic_file_llseek_size(file, offset, origin,
306 ext3_get_htree_eof(file));
307 else
308 return generic_file_llseek(file, offset, origin);
309}
246 310
247/* 311/*
248 * This structure holds the nodes of the red-black tree used to store 312 * This structure holds the nodes of the red-black tree used to store
@@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root)
303} 367}
304 368
305 369
306static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) 370static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp,
371 loff_t pos)
307{ 372{
308 struct dir_private_info *p; 373 struct dir_private_info *p;
309 374
310 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); 375 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
311 if (!p) 376 if (!p)
312 return NULL; 377 return NULL;
313 p->curr_hash = pos2maj_hash(pos); 378 p->curr_hash = pos2maj_hash(filp, pos);
314 p->curr_minor_hash = pos2min_hash(pos); 379 p->curr_minor_hash = pos2min_hash(filp, pos);
315 return p; 380 return p;
316} 381}
317 382
@@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent,
401 printk("call_filldir: called with null fname?!?\n"); 466 printk("call_filldir: called with null fname?!?\n");
402 return 0; 467 return 0;
403 } 468 }
404 curr_pos = hash2pos(fname->hash, fname->minor_hash); 469 curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
405 while (fname) { 470 while (fname) {
406 error = filldir(dirent, fname->name, 471 error = filldir(dirent, fname->name,
407 fname->name_len, curr_pos, 472 fname->name_len, curr_pos,
@@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp,
426 int ret; 491 int ret;
427 492
428 if (!info) { 493 if (!info) {
429 info = ext3_htree_create_dir_info(filp->f_pos); 494 info = ext3_htree_create_dir_info(filp, filp->f_pos);
430 if (!info) 495 if (!info)
431 return -ENOMEM; 496 return -ENOMEM;
432 filp->private_data = info; 497 filp->private_data = info;
433 } 498 }
434 499
435 if (filp->f_pos == EXT3_HTREE_EOF) 500 if (filp->f_pos == ext3_get_htree_eof(filp))
436 return 0; /* EOF */ 501 return 0; /* EOF */
437 502
438 /* Some one has messed with f_pos; reset the world */ 503 /* Some one has messed with f_pos; reset the world */
@@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp,
440 free_rb_tree_fname(&info->root); 505 free_rb_tree_fname(&info->root);
441 info->curr_node = NULL; 506 info->curr_node = NULL;
442 info->extra_fname = NULL; 507 info->extra_fname = NULL;
443 info->curr_hash = pos2maj_hash(filp->f_pos); 508 info->curr_hash = pos2maj_hash(filp, filp->f_pos);
444 info->curr_minor_hash = pos2min_hash(filp->f_pos); 509 info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
445 } 510 }
446 511
447 /* 512 /*
@@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp,
473 if (ret < 0) 538 if (ret < 0)
474 return ret; 539 return ret;
475 if (ret == 0) { 540 if (ret == 0) {
476 filp->f_pos = EXT3_HTREE_EOF; 541 filp->f_pos = ext3_get_htree_eof(filp);
477 break; 542 break;
478 } 543 }
479 info->curr_node = rb_first(&info->root); 544 info->curr_node = rb_first(&info->root);
@@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp,
493 info->curr_minor_hash = fname->minor_hash; 558 info->curr_minor_hash = fname->minor_hash;
494 } else { 559 } else {
495 if (info->next_hash == ~0) { 560 if (info->next_hash == ~0) {
496 filp->f_pos = EXT3_HTREE_EOF; 561 filp->f_pos = ext3_get_htree_eof(filp);
497 break; 562 break;
498 } 563 }
499 info->curr_hash = info->next_hash; 564 info->curr_hash = info->next_hash;
@@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp)
512 577
513 return 0; 578 return 0;
514} 579}
580
581const struct file_operations ext3_dir_operations = {
582 .llseek = ext3_dir_llseek,
583 .read = generic_read_dir,
584 .readdir = ext3_readdir,
585 .unlocked_ioctl = ext3_ioctl,
586#ifdef CONFIG_COMPAT
587 .compat_ioctl = ext3_compat_ioctl,
588#endif
589 .fsync = ext3_sync_file,
590 .release = ext3_release_dir,
591};
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h
index b6515fd7e56c..fe5bef7914ea 100644
--- a/fs/ext3/ext3.h
+++ b/fs/ext3/ext3.h
@@ -920,7 +920,11 @@ struct dx_hash_info
920 u32 *seed; 920 u32 *seed;
921}; 921};
922 922
923#define EXT3_HTREE_EOF 0x7fffffff 923
924/* 32 and 64 bit signed EOF for dx directories */
925#define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
926#define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
927
924 928
925/* 929/*
926 * Control parameters used by ext3_htree_next_block 930 * Control parameters used by ext3_htree_next_block
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index d10231ddcf8a..ede315cdf126 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
198 return -1; 198 return -1;
199 } 199 }
200 hash = hash & ~1; 200 hash = hash & ~1;
201 if (hash == (EXT3_HTREE_EOF << 1)) 201 if (hash == (EXT3_HTREE_EOF_32BIT << 1))
202 hash = (EXT3_HTREE_EOF-1) << 1; 202 hash = (EXT3_HTREE_EOF_32BIT - 1) << 1;
203 hinfo->hash = hash; 203 hinfo->hash = hash;
204 hinfo->minor_hash = minor_hash; 204 hinfo->minor_hash = minor_hash;
205 return 0; 205 return 0;