diff options
author | Eric Sandeen <sandeen@redhat.com> | 2012-04-26 14:10:39 -0400 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2012-05-15 17:34:39 -0400 |
commit | d7dab39b6e16d5eea78ed3c705d2a2d0772b4f06 (patch) | |
tree | a2b201b2afd8cd1d278947f7e080ce7457e88814 /fs/ext3 | |
parent | a80b12c3d08dbbf15e6a551e481c32a2df4911f3 (diff) |
ext3: return 32/64-bit dir name hash according to usage type
This is based on commit d1f5273e9adb40724a85272f248f210dc4ce919a
ext4: return 32/64-bit dir name hash according to usage type
by Fan Yong <yong.fan@whamcloud.com>
Traditionally ext2/3/4 has returned a 32-bit hash value from llseek()
to appease NFSv2, which can only handle a 32-bit cookie for seekdir()
and telldir(). However, this causes problems if there are 32-bit hash
collisions, since the NFSv2 server can get stuck resending the same
entries from the directory repeatedly.
Allow ext3 to return a full 64-bit hash (both major and minor) for
telldir to decrease the chance of hash collisions.
This patch does implement a new ext3_dir_llseek op, because with 64-bit
hashes, nfs will attempt to seek to a hash "offset" which is much
larger than ext3's s_maxbytes. So for dx dirs, we call
generic_file_llseek_size() with the appropriate max hash value as the
maximum seekable size. Otherwise we just pass through to
generic_file_llseek().
Patch-updated-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de>
Patch-updated-by: Eric Sandeen <sandeen@redhat.com>
(blame us if something is not correct)
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/ext3')
-rw-r--r-- | fs/ext3/dir.c | 167 | ||||
-rw-r--r-- | fs/ext3/ext3.h | 6 | ||||
-rw-r--r-- | fs/ext3/hash.c | 4 |
3 files changed, 129 insertions, 48 deletions
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index cc761ad8fa57..92490e9f85ca 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -21,30 +21,15 @@ | |||
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/compat.h> | ||
24 | #include "ext3.h" | 25 | #include "ext3.h" |
25 | 26 | ||
26 | static unsigned char ext3_filetype_table[] = { | 27 | static unsigned char ext3_filetype_table[] = { |
27 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 28 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
28 | }; | 29 | }; |
29 | 30 | ||
30 | static int ext3_readdir(struct file *, void *, filldir_t); | ||
31 | static int ext3_dx_readdir(struct file * filp, | 31 | static int ext3_dx_readdir(struct file * filp, |
32 | void * dirent, filldir_t filldir); | 32 | void * dirent, filldir_t filldir); |
33 | static int ext3_release_dir (struct inode * inode, | ||
34 | struct file * filp); | ||
35 | |||
36 | const struct file_operations ext3_dir_operations = { | ||
37 | .llseek = generic_file_llseek, | ||
38 | .read = generic_read_dir, | ||
39 | .readdir = ext3_readdir, /* we take BKL. needed?*/ | ||
40 | .unlocked_ioctl = ext3_ioctl, | ||
41 | #ifdef CONFIG_COMPAT | ||
42 | .compat_ioctl = ext3_compat_ioctl, | ||
43 | #endif | ||
44 | .fsync = ext3_sync_file, /* BKL held */ | ||
45 | .release = ext3_release_dir, | ||
46 | }; | ||
47 | |||
48 | 33 | ||
49 | static unsigned char get_dtype(struct super_block *sb, int filetype) | 34 | static unsigned char get_dtype(struct super_block *sb, int filetype) |
50 | { | 35 | { |
@@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
55 | return (ext3_filetype_table[filetype]); | 40 | return (ext3_filetype_table[filetype]); |
56 | } | 41 | } |
57 | 42 | ||
43 | /** | ||
44 | * Check if the given dir-inode refers to an htree-indexed directory | ||
45 | * (or a directory which chould potentially get coverted to use htree | ||
46 | * indexing). | ||
47 | * | ||
48 | * Return 1 if it is a dx dir, 0 if not | ||
49 | */ | ||
50 | static int is_dx_dir(struct inode *inode) | ||
51 | { | ||
52 | struct super_block *sb = inode->i_sb; | ||
53 | |||
54 | if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, | ||
55 | EXT3_FEATURE_COMPAT_DIR_INDEX) && | ||
56 | ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || | ||
57 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) | ||
58 | return 1; | ||
59 | |||
60 | return 0; | ||
61 | } | ||
58 | 62 | ||
59 | int ext3_check_dir_entry (const char * function, struct inode * dir, | 63 | int ext3_check_dir_entry (const char * function, struct inode * dir, |
60 | struct ext3_dir_entry_2 * de, | 64 | struct ext3_dir_entry_2 * de, |
@@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp, | |||
94 | unsigned long offset; | 98 | unsigned long offset; |
95 | int i, stored; | 99 | int i, stored; |
96 | struct ext3_dir_entry_2 *de; | 100 | struct ext3_dir_entry_2 *de; |
97 | struct super_block *sb; | ||
98 | int err; | 101 | int err; |
99 | struct inode *inode = filp->f_path.dentry->d_inode; | 102 | struct inode *inode = filp->f_path.dentry->d_inode; |
103 | struct super_block *sb = inode->i_sb; | ||
100 | int ret = 0; | 104 | int ret = 0; |
101 | int dir_has_error = 0; | 105 | int dir_has_error = 0; |
102 | 106 | ||
103 | sb = inode->i_sb; | 107 | if (is_dx_dir(inode)) { |
104 | |||
105 | if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, | ||
106 | EXT3_FEATURE_COMPAT_DIR_INDEX) && | ||
107 | ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || | ||
108 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { | ||
109 | err = ext3_dx_readdir(filp, dirent, filldir); | 108 | err = ext3_dx_readdir(filp, dirent, filldir); |
110 | if (err != ERR_BAD_DX_DIR) { | 109 | if (err != ERR_BAD_DX_DIR) { |
111 | ret = err; | 110 | ret = err; |
@@ -227,22 +226,87 @@ out: | |||
227 | return ret; | 226 | return ret; |
228 | } | 227 | } |
229 | 228 | ||
229 | static inline int is_32bit_api(void) | ||
230 | { | ||
231 | #ifdef CONFIG_COMPAT | ||
232 | return is_compat_task(); | ||
233 | #else | ||
234 | return (BITS_PER_LONG == 32); | ||
235 | #endif | ||
236 | } | ||
237 | |||
230 | /* | 238 | /* |
231 | * These functions convert from the major/minor hash to an f_pos | 239 | * These functions convert from the major/minor hash to an f_pos |
232 | * value. | 240 | * value for dx directories |
233 | * | 241 | * |
234 | * Currently we only use major hash numer. This is unfortunate, but | 242 | * Upper layer (for example NFS) should specify FMODE_32BITHASH or |
235 | * on 32-bit machines, the same VFS interface is used for lseek and | 243 | * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted |
236 | * llseek, so if we use the 64 bit offset, then the 32-bit versions of | 244 | * directly on both 32-bit and 64-bit nodes, under such case, neither |
237 | * lseek/telldir/seekdir will blow out spectacularly, and from within | 245 | * FMODE_32BITHASH nor FMODE_64BITHASH is specified. |
238 | * the ext2 low-level routine, we don't know if we're being called by | ||
239 | * a 64-bit version of the system call or the 32-bit version of the | ||
240 | * system call. Worse yet, NFSv2 only allows for a 32-bit readdir | ||
241 | * cookie. Sigh. | ||
242 | */ | 246 | */ |
243 | #define hash2pos(major, minor) (major >> 1) | 247 | static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) |
244 | #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) | 248 | { |
245 | #define pos2min_hash(pos) (0) | 249 | if ((filp->f_mode & FMODE_32BITHASH) || |
250 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
251 | return major >> 1; | ||
252 | else | ||
253 | return ((__u64)(major >> 1) << 32) | (__u64)minor; | ||
254 | } | ||
255 | |||
256 | static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) | ||
257 | { | ||
258 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
259 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
260 | return (pos << 1) & 0xffffffff; | ||
261 | else | ||
262 | return ((pos >> 32) << 1) & 0xffffffff; | ||
263 | } | ||
264 | |||
265 | static inline __u32 pos2min_hash(struct file *filp, loff_t pos) | ||
266 | { | ||
267 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
268 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
269 | return 0; | ||
270 | else | ||
271 | return pos & 0xffffffff; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Return 32- or 64-bit end-of-file for dx directories | ||
276 | */ | ||
277 | static inline loff_t ext3_get_htree_eof(struct file *filp) | ||
278 | { | ||
279 | if ((filp->f_mode & FMODE_32BITHASH) || | ||
280 | (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) | ||
281 | return EXT3_HTREE_EOF_32BIT; | ||
282 | else | ||
283 | return EXT3_HTREE_EOF_64BIT; | ||
284 | } | ||
285 | |||
286 | |||
287 | /* | ||
288 | * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both | ||
289 | * non-htree and htree directories, where the "offset" is in terms | ||
290 | * of the filename hash value instead of the byte offset. | ||
291 | * | ||
292 | * Because we may return a 64-bit hash that is well beyond s_maxbytes, | ||
293 | * we need to pass the max hash as the maximum allowable offset in | ||
294 | * the htree directory case. | ||
295 | * | ||
296 | * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) | ||
297 | * will be invalid once the directory was converted into a dx directory | ||
298 | */ | ||
299 | loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) | ||
300 | { | ||
301 | struct inode *inode = file->f_mapping->host; | ||
302 | int dx_dir = is_dx_dir(inode); | ||
303 | |||
304 | if (likely(dx_dir)) | ||
305 | return generic_file_llseek_size(file, offset, origin, | ||
306 | ext3_get_htree_eof(file)); | ||
307 | else | ||
308 | return generic_file_llseek(file, offset, origin); | ||
309 | } | ||
246 | 310 | ||
247 | /* | 311 | /* |
248 | * This structure holds the nodes of the red-black tree used to store | 312 | * This structure holds the nodes of the red-black tree used to store |
@@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
303 | } | 367 | } |
304 | 368 | ||
305 | 369 | ||
306 | static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) | 370 | static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, |
371 | loff_t pos) | ||
307 | { | 372 | { |
308 | struct dir_private_info *p; | 373 | struct dir_private_info *p; |
309 | 374 | ||
310 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); | 375 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); |
311 | if (!p) | 376 | if (!p) |
312 | return NULL; | 377 | return NULL; |
313 | p->curr_hash = pos2maj_hash(pos); | 378 | p->curr_hash = pos2maj_hash(filp, pos); |
314 | p->curr_minor_hash = pos2min_hash(pos); | 379 | p->curr_minor_hash = pos2min_hash(filp, pos); |
315 | return p; | 380 | return p; |
316 | } | 381 | } |
317 | 382 | ||
@@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent, | |||
401 | printk("call_filldir: called with null fname?!?\n"); | 466 | printk("call_filldir: called with null fname?!?\n"); |
402 | return 0; | 467 | return 0; |
403 | } | 468 | } |
404 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 469 | curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); |
405 | while (fname) { | 470 | while (fname) { |
406 | error = filldir(dirent, fname->name, | 471 | error = filldir(dirent, fname->name, |
407 | fname->name_len, curr_pos, | 472 | fname->name_len, curr_pos, |
@@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp, | |||
426 | int ret; | 491 | int ret; |
427 | 492 | ||
428 | if (!info) { | 493 | if (!info) { |
429 | info = ext3_htree_create_dir_info(filp->f_pos); | 494 | info = ext3_htree_create_dir_info(filp, filp->f_pos); |
430 | if (!info) | 495 | if (!info) |
431 | return -ENOMEM; | 496 | return -ENOMEM; |
432 | filp->private_data = info; | 497 | filp->private_data = info; |
433 | } | 498 | } |
434 | 499 | ||
435 | if (filp->f_pos == EXT3_HTREE_EOF) | 500 | if (filp->f_pos == ext3_get_htree_eof(filp)) |
436 | return 0; /* EOF */ | 501 | return 0; /* EOF */ |
437 | 502 | ||
438 | /* Some one has messed with f_pos; reset the world */ | 503 | /* Some one has messed with f_pos; reset the world */ |
@@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp, | |||
440 | free_rb_tree_fname(&info->root); | 505 | free_rb_tree_fname(&info->root); |
441 | info->curr_node = NULL; | 506 | info->curr_node = NULL; |
442 | info->extra_fname = NULL; | 507 | info->extra_fname = NULL; |
443 | info->curr_hash = pos2maj_hash(filp->f_pos); | 508 | info->curr_hash = pos2maj_hash(filp, filp->f_pos); |
444 | info->curr_minor_hash = pos2min_hash(filp->f_pos); | 509 | info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); |
445 | } | 510 | } |
446 | 511 | ||
447 | /* | 512 | /* |
@@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
473 | if (ret < 0) | 538 | if (ret < 0) |
474 | return ret; | 539 | return ret; |
475 | if (ret == 0) { | 540 | if (ret == 0) { |
476 | filp->f_pos = EXT3_HTREE_EOF; | 541 | filp->f_pos = ext3_get_htree_eof(filp); |
477 | break; | 542 | break; |
478 | } | 543 | } |
479 | info->curr_node = rb_first(&info->root); | 544 | info->curr_node = rb_first(&info->root); |
@@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
493 | info->curr_minor_hash = fname->minor_hash; | 558 | info->curr_minor_hash = fname->minor_hash; |
494 | } else { | 559 | } else { |
495 | if (info->next_hash == ~0) { | 560 | if (info->next_hash == ~0) { |
496 | filp->f_pos = EXT3_HTREE_EOF; | 561 | filp->f_pos = ext3_get_htree_eof(filp); |
497 | break; | 562 | break; |
498 | } | 563 | } |
499 | info->curr_hash = info->next_hash; | 564 | info->curr_hash = info->next_hash; |
@@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp) | |||
512 | 577 | ||
513 | return 0; | 578 | return 0; |
514 | } | 579 | } |
580 | |||
581 | const struct file_operations ext3_dir_operations = { | ||
582 | .llseek = ext3_dir_llseek, | ||
583 | .read = generic_read_dir, | ||
584 | .readdir = ext3_readdir, | ||
585 | .unlocked_ioctl = ext3_ioctl, | ||
586 | #ifdef CONFIG_COMPAT | ||
587 | .compat_ioctl = ext3_compat_ioctl, | ||
588 | #endif | ||
589 | .fsync = ext3_sync_file, | ||
590 | .release = ext3_release_dir, | ||
591 | }; | ||
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h index b6515fd7e56c..fe5bef7914ea 100644 --- a/fs/ext3/ext3.h +++ b/fs/ext3/ext3.h | |||
@@ -920,7 +920,11 @@ struct dx_hash_info | |||
920 | u32 *seed; | 920 | u32 *seed; |
921 | }; | 921 | }; |
922 | 922 | ||
923 | #define EXT3_HTREE_EOF 0x7fffffff | 923 | |
924 | /* 32 and 64 bit signed EOF for dx directories */ | ||
925 | #define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) | ||
926 | #define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) | ||
927 | |||
924 | 928 | ||
925 | /* | 929 | /* |
926 | * Control parameters used by ext3_htree_next_block | 930 | * Control parameters used by ext3_htree_next_block |
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c index d10231ddcf8a..ede315cdf126 100644 --- a/fs/ext3/hash.c +++ b/fs/ext3/hash.c | |||
@@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) | |||
198 | return -1; | 198 | return -1; |
199 | } | 199 | } |
200 | hash = hash & ~1; | 200 | hash = hash & ~1; |
201 | if (hash == (EXT3_HTREE_EOF << 1)) | 201 | if (hash == (EXT3_HTREE_EOF_32BIT << 1)) |
202 | hash = (EXT3_HTREE_EOF-1) << 1; | 202 | hash = (EXT3_HTREE_EOF_32BIT - 1) << 1; |
203 | hinfo->hash = hash; | 203 | hinfo->hash = hash; |
204 | hinfo->minor_hash = minor_hash; | 204 | hinfo->minor_hash = minor_hash; |
205 | return 0; | 205 | return 0; |