diff options
Diffstat (limited to 'fs/ext3/dir.c')
-rw-r--r-- | fs/ext3/dir.c | 519 |
1 files changed, 519 insertions, 0 deletions
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c new file mode 100644 index 000000000000..832867aef3dc --- /dev/null +++ b/fs/ext3/dir.c | |||
@@ -0,0 +1,519 @@ | |||
1 | /* | ||
2 | * linux/fs/ext3/dir.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
5 | * Remy Card (card@masi.ibp.fr) | ||
6 | * Laboratoire MASI - Institut Blaise Pascal | ||
7 | * Universite Pierre et Marie Curie (Paris VI) | ||
8 | * | ||
9 | * from | ||
10 | * | ||
11 | * linux/fs/minix/dir.c | ||
12 | * | ||
13 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
14 | * | ||
15 | * ext3 directory handling functions | ||
16 | * | ||
17 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
18 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
19 | * | ||
20 | * Hash Tree Directory indexing (c) 2001 Daniel Phillips | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/fs.h> | ||
25 | #include <linux/jbd.h> | ||
26 | #include <linux/ext3_fs.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/smp_lock.h> | ||
29 | #include <linux/slab.h> | ||
30 | #include <linux/rbtree.h> | ||
31 | |||
32 | static unsigned char ext3_filetype_table[] = { | ||
33 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | ||
34 | }; | ||
35 | |||
36 | static int ext3_readdir(struct file *, void *, filldir_t); | ||
37 | static int ext3_dx_readdir(struct file * filp, | ||
38 | void * dirent, filldir_t filldir); | ||
39 | static int ext3_release_dir (struct inode * inode, | ||
40 | struct file * filp); | ||
41 | |||
42 | struct file_operations ext3_dir_operations = { | ||
43 | .llseek = generic_file_llseek, | ||
44 | .read = generic_read_dir, | ||
45 | .readdir = ext3_readdir, /* we take BKL. needed?*/ | ||
46 | .ioctl = ext3_ioctl, /* BKL held */ | ||
47 | .fsync = ext3_sync_file, /* BKL held */ | ||
48 | #ifdef CONFIG_EXT3_INDEX | ||
49 | .release = ext3_release_dir, | ||
50 | #endif | ||
51 | }; | ||
52 | |||
53 | |||
54 | static unsigned char get_dtype(struct super_block *sb, int filetype) | ||
55 | { | ||
56 | if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || | ||
57 | (filetype >= EXT3_FT_MAX)) | ||
58 | return DT_UNKNOWN; | ||
59 | |||
60 | return (ext3_filetype_table[filetype]); | ||
61 | } | ||
62 | |||
63 | |||
64 | int ext3_check_dir_entry (const char * function, struct inode * dir, | ||
65 | struct ext3_dir_entry_2 * de, | ||
66 | struct buffer_head * bh, | ||
67 | unsigned long offset) | ||
68 | { | ||
69 | const char * error_msg = NULL; | ||
70 | const int rlen = le16_to_cpu(de->rec_len); | ||
71 | |||
72 | if (rlen < EXT3_DIR_REC_LEN(1)) | ||
73 | error_msg = "rec_len is smaller than minimal"; | ||
74 | else if (rlen % 4 != 0) | ||
75 | error_msg = "rec_len % 4 != 0"; | ||
76 | else if (rlen < EXT3_DIR_REC_LEN(de->name_len)) | ||
77 | error_msg = "rec_len is too small for name_len"; | ||
78 | else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) | ||
79 | error_msg = "directory entry across blocks"; | ||
80 | else if (le32_to_cpu(de->inode) > | ||
81 | le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) | ||
82 | error_msg = "inode out of bounds"; | ||
83 | |||
84 | if (error_msg != NULL) | ||
85 | ext3_error (dir->i_sb, function, | ||
86 | "bad entry in directory #%lu: %s - " | ||
87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | ||
88 | dir->i_ino, error_msg, offset, | ||
89 | (unsigned long) le32_to_cpu(de->inode), | ||
90 | rlen, de->name_len); | ||
91 | return error_msg == NULL ? 1 : 0; | ||
92 | } | ||
93 | |||
94 | static int ext3_readdir(struct file * filp, | ||
95 | void * dirent, filldir_t filldir) | ||
96 | { | ||
97 | int error = 0; | ||
98 | unsigned long offset, blk; | ||
99 | int i, num, stored; | ||
100 | struct buffer_head * bh, * tmp, * bha[16]; | ||
101 | struct ext3_dir_entry_2 * de; | ||
102 | struct super_block * sb; | ||
103 | int err; | ||
104 | struct inode *inode = filp->f_dentry->d_inode; | ||
105 | int ret = 0; | ||
106 | |||
107 | sb = inode->i_sb; | ||
108 | |||
109 | #ifdef CONFIG_EXT3_INDEX | ||
110 | if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, | ||
111 | EXT3_FEATURE_COMPAT_DIR_INDEX) && | ||
112 | ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || | ||
113 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { | ||
114 | err = ext3_dx_readdir(filp, dirent, filldir); | ||
115 | if (err != ERR_BAD_DX_DIR) { | ||
116 | ret = err; | ||
117 | goto out; | ||
118 | } | ||
119 | /* | ||
120 | * We don't set the inode dirty flag since it's not | ||
121 | * critical that it get flushed back to the disk. | ||
122 | */ | ||
123 | EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; | ||
124 | } | ||
125 | #endif | ||
126 | stored = 0; | ||
127 | bh = NULL; | ||
128 | offset = filp->f_pos & (sb->s_blocksize - 1); | ||
129 | |||
130 | while (!error && !stored && filp->f_pos < inode->i_size) { | ||
131 | blk = (filp->f_pos) >> EXT3_BLOCK_SIZE_BITS(sb); | ||
132 | bh = ext3_bread(NULL, inode, blk, 0, &err); | ||
133 | if (!bh) { | ||
134 | ext3_error (sb, "ext3_readdir", | ||
135 | "directory #%lu contains a hole at offset %lu", | ||
136 | inode->i_ino, (unsigned long)filp->f_pos); | ||
137 | filp->f_pos += sb->s_blocksize - offset; | ||
138 | continue; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Do the readahead | ||
143 | */ | ||
144 | if (!offset) { | ||
145 | for (i = 16 >> (EXT3_BLOCK_SIZE_BITS(sb) - 9), num = 0; | ||
146 | i > 0; i--) { | ||
147 | tmp = ext3_getblk (NULL, inode, ++blk, 0, &err); | ||
148 | if (tmp && !buffer_uptodate(tmp) && | ||
149 | !buffer_locked(tmp)) | ||
150 | bha[num++] = tmp; | ||
151 | else | ||
152 | brelse (tmp); | ||
153 | } | ||
154 | if (num) { | ||
155 | ll_rw_block (READA, num, bha); | ||
156 | for (i = 0; i < num; i++) | ||
157 | brelse (bha[i]); | ||
158 | } | ||
159 | } | ||
160 | |||
161 | revalidate: | ||
162 | /* If the dir block has changed since the last call to | ||
163 | * readdir(2), then we might be pointing to an invalid | ||
164 | * dirent right now. Scan from the start of the block | ||
165 | * to make sure. */ | ||
166 | if (filp->f_version != inode->i_version) { | ||
167 | for (i = 0; i < sb->s_blocksize && i < offset; ) { | ||
168 | de = (struct ext3_dir_entry_2 *) | ||
169 | (bh->b_data + i); | ||
170 | /* It's too expensive to do a full | ||
171 | * dirent test each time round this | ||
172 | * loop, but we do have to test at | ||
173 | * least that it is non-zero. A | ||
174 | * failure will be detected in the | ||
175 | * dirent test below. */ | ||
176 | if (le16_to_cpu(de->rec_len) < | ||
177 | EXT3_DIR_REC_LEN(1)) | ||
178 | break; | ||
179 | i += le16_to_cpu(de->rec_len); | ||
180 | } | ||
181 | offset = i; | ||
182 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | ||
183 | | offset; | ||
184 | filp->f_version = inode->i_version; | ||
185 | } | ||
186 | |||
187 | while (!error && filp->f_pos < inode->i_size | ||
188 | && offset < sb->s_blocksize) { | ||
189 | de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); | ||
190 | if (!ext3_check_dir_entry ("ext3_readdir", inode, de, | ||
191 | bh, offset)) { | ||
192 | /* On error, skip the f_pos to the | ||
193 | next block. */ | ||
194 | filp->f_pos = (filp->f_pos | | ||
195 | (sb->s_blocksize - 1)) + 1; | ||
196 | brelse (bh); | ||
197 | ret = stored; | ||
198 | goto out; | ||
199 | } | ||
200 | offset += le16_to_cpu(de->rec_len); | ||
201 | if (le32_to_cpu(de->inode)) { | ||
202 | /* We might block in the next section | ||
203 | * if the data destination is | ||
204 | * currently swapped out. So, use a | ||
205 | * version stamp to detect whether or | ||
206 | * not the directory has been modified | ||
207 | * during the copy operation. | ||
208 | */ | ||
209 | unsigned long version = filp->f_version; | ||
210 | |||
211 | error = filldir(dirent, de->name, | ||
212 | de->name_len, | ||
213 | filp->f_pos, | ||
214 | le32_to_cpu(de->inode), | ||
215 | get_dtype(sb, de->file_type)); | ||
216 | if (error) | ||
217 | break; | ||
218 | if (version != filp->f_version) | ||
219 | goto revalidate; | ||
220 | stored ++; | ||
221 | } | ||
222 | filp->f_pos += le16_to_cpu(de->rec_len); | ||
223 | } | ||
224 | offset = 0; | ||
225 | brelse (bh); | ||
226 | } | ||
227 | out: | ||
228 | return ret; | ||
229 | } | ||
230 | |||
231 | #ifdef CONFIG_EXT3_INDEX | ||
232 | /* | ||
233 | * These functions convert from the major/minor hash to an f_pos | ||
234 | * value. | ||
235 | * | ||
236 | * Currently we only use major hash numer. This is unfortunate, but | ||
237 | * on 32-bit machines, the same VFS interface is used for lseek and | ||
238 | * llseek, so if we use the 64 bit offset, then the 32-bit versions of | ||
239 | * lseek/telldir/seekdir will blow out spectacularly, and from within | ||
240 | * the ext2 low-level routine, we don't know if we're being called by | ||
241 | * a 64-bit version of the system call or the 32-bit version of the | ||
242 | * system call. Worse yet, NFSv2 only allows for a 32-bit readdir | ||
243 | * cookie. Sigh. | ||
244 | */ | ||
245 | #define hash2pos(major, minor) (major >> 1) | ||
246 | #define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) | ||
247 | #define pos2min_hash(pos) (0) | ||
248 | |||
249 | /* | ||
250 | * This structure holds the nodes of the red-black tree used to store | ||
251 | * the directory entry in hash order. | ||
252 | */ | ||
253 | struct fname { | ||
254 | __u32 hash; | ||
255 | __u32 minor_hash; | ||
256 | struct rb_node rb_hash; | ||
257 | struct fname *next; | ||
258 | __u32 inode; | ||
259 | __u8 name_len; | ||
260 | __u8 file_type; | ||
261 | char name[0]; | ||
262 | }; | ||
263 | |||
264 | /* | ||
265 | * This functoin implements a non-recursive way of freeing all of the | ||
266 | * nodes in the red-black tree. | ||
267 | */ | ||
268 | static void free_rb_tree_fname(struct rb_root *root) | ||
269 | { | ||
270 | struct rb_node *n = root->rb_node; | ||
271 | struct rb_node *parent; | ||
272 | struct fname *fname; | ||
273 | |||
274 | while (n) { | ||
275 | /* Do the node's children first */ | ||
276 | if ((n)->rb_left) { | ||
277 | n = n->rb_left; | ||
278 | continue; | ||
279 | } | ||
280 | if (n->rb_right) { | ||
281 | n = n->rb_right; | ||
282 | continue; | ||
283 | } | ||
284 | /* | ||
285 | * The node has no children; free it, and then zero | ||
286 | * out parent's link to it. Finally go to the | ||
287 | * beginning of the loop and try to free the parent | ||
288 | * node. | ||
289 | */ | ||
290 | parent = n->rb_parent; | ||
291 | fname = rb_entry(n, struct fname, rb_hash); | ||
292 | while (fname) { | ||
293 | struct fname * old = fname; | ||
294 | fname = fname->next; | ||
295 | kfree (old); | ||
296 | } | ||
297 | if (!parent) | ||
298 | root->rb_node = NULL; | ||
299 | else if (parent->rb_left == n) | ||
300 | parent->rb_left = NULL; | ||
301 | else if (parent->rb_right == n) | ||
302 | parent->rb_right = NULL; | ||
303 | n = parent; | ||
304 | } | ||
305 | root->rb_node = NULL; | ||
306 | } | ||
307 | |||
308 | |||
309 | static struct dir_private_info *create_dir_info(loff_t pos) | ||
310 | { | ||
311 | struct dir_private_info *p; | ||
312 | |||
313 | p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); | ||
314 | if (!p) | ||
315 | return NULL; | ||
316 | p->root.rb_node = NULL; | ||
317 | p->curr_node = NULL; | ||
318 | p->extra_fname = NULL; | ||
319 | p->last_pos = 0; | ||
320 | p->curr_hash = pos2maj_hash(pos); | ||
321 | p->curr_minor_hash = pos2min_hash(pos); | ||
322 | p->next_hash = 0; | ||
323 | return p; | ||
324 | } | ||
325 | |||
326 | void ext3_htree_free_dir_info(struct dir_private_info *p) | ||
327 | { | ||
328 | free_rb_tree_fname(&p->root); | ||
329 | kfree(p); | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * Given a directory entry, enter it into the fname rb tree. | ||
334 | */ | ||
335 | int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, | ||
336 | __u32 minor_hash, | ||
337 | struct ext3_dir_entry_2 *dirent) | ||
338 | { | ||
339 | struct rb_node **p, *parent = NULL; | ||
340 | struct fname * fname, *new_fn; | ||
341 | struct dir_private_info *info; | ||
342 | int len; | ||
343 | |||
344 | info = (struct dir_private_info *) dir_file->private_data; | ||
345 | p = &info->root.rb_node; | ||
346 | |||
347 | /* Create and allocate the fname structure */ | ||
348 | len = sizeof(struct fname) + dirent->name_len + 1; | ||
349 | new_fn = kmalloc(len, GFP_KERNEL); | ||
350 | if (!new_fn) | ||
351 | return -ENOMEM; | ||
352 | memset(new_fn, 0, len); | ||
353 | new_fn->hash = hash; | ||
354 | new_fn->minor_hash = minor_hash; | ||
355 | new_fn->inode = le32_to_cpu(dirent->inode); | ||
356 | new_fn->name_len = dirent->name_len; | ||
357 | new_fn->file_type = dirent->file_type; | ||
358 | memcpy(new_fn->name, dirent->name, dirent->name_len); | ||
359 | new_fn->name[dirent->name_len] = 0; | ||
360 | |||
361 | while (*p) { | ||
362 | parent = *p; | ||
363 | fname = rb_entry(parent, struct fname, rb_hash); | ||
364 | |||
365 | /* | ||
366 | * If the hash and minor hash match up, then we put | ||
367 | * them on a linked list. This rarely happens... | ||
368 | */ | ||
369 | if ((new_fn->hash == fname->hash) && | ||
370 | (new_fn->minor_hash == fname->minor_hash)) { | ||
371 | new_fn->next = fname->next; | ||
372 | fname->next = new_fn; | ||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | if (new_fn->hash < fname->hash) | ||
377 | p = &(*p)->rb_left; | ||
378 | else if (new_fn->hash > fname->hash) | ||
379 | p = &(*p)->rb_right; | ||
380 | else if (new_fn->minor_hash < fname->minor_hash) | ||
381 | p = &(*p)->rb_left; | ||
382 | else /* if (new_fn->minor_hash > fname->minor_hash) */ | ||
383 | p = &(*p)->rb_right; | ||
384 | } | ||
385 | |||
386 | rb_link_node(&new_fn->rb_hash, parent, p); | ||
387 | rb_insert_color(&new_fn->rb_hash, &info->root); | ||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | |||
392 | |||
393 | /* | ||
394 | * This is a helper function for ext3_dx_readdir. It calls filldir | ||
395 | * for all entres on the fname linked list. (Normally there is only | ||
396 | * one entry on the linked list, unless there are 62 bit hash collisions.) | ||
397 | */ | ||
398 | static int call_filldir(struct file * filp, void * dirent, | ||
399 | filldir_t filldir, struct fname *fname) | ||
400 | { | ||
401 | struct dir_private_info *info = filp->private_data; | ||
402 | loff_t curr_pos; | ||
403 | struct inode *inode = filp->f_dentry->d_inode; | ||
404 | struct super_block * sb; | ||
405 | int error; | ||
406 | |||
407 | sb = inode->i_sb; | ||
408 | |||
409 | if (!fname) { | ||
410 | printk("call_filldir: called with null fname?!?\n"); | ||
411 | return 0; | ||
412 | } | ||
413 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | ||
414 | while (fname) { | ||
415 | error = filldir(dirent, fname->name, | ||
416 | fname->name_len, curr_pos, | ||
417 | fname->inode, | ||
418 | get_dtype(sb, fname->file_type)); | ||
419 | if (error) { | ||
420 | filp->f_pos = curr_pos; | ||
421 | info->extra_fname = fname->next; | ||
422 | return error; | ||
423 | } | ||
424 | fname = fname->next; | ||
425 | } | ||
426 | return 0; | ||
427 | } | ||
428 | |||
429 | static int ext3_dx_readdir(struct file * filp, | ||
430 | void * dirent, filldir_t filldir) | ||
431 | { | ||
432 | struct dir_private_info *info = filp->private_data; | ||
433 | struct inode *inode = filp->f_dentry->d_inode; | ||
434 | struct fname *fname; | ||
435 | int ret; | ||
436 | |||
437 | if (!info) { | ||
438 | info = create_dir_info(filp->f_pos); | ||
439 | if (!info) | ||
440 | return -ENOMEM; | ||
441 | filp->private_data = info; | ||
442 | } | ||
443 | |||
444 | if (filp->f_pos == EXT3_HTREE_EOF) | ||
445 | return 0; /* EOF */ | ||
446 | |||
447 | /* Some one has messed with f_pos; reset the world */ | ||
448 | if (info->last_pos != filp->f_pos) { | ||
449 | free_rb_tree_fname(&info->root); | ||
450 | info->curr_node = NULL; | ||
451 | info->extra_fname = NULL; | ||
452 | info->curr_hash = pos2maj_hash(filp->f_pos); | ||
453 | info->curr_minor_hash = pos2min_hash(filp->f_pos); | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * If there are any leftover names on the hash collision | ||
458 | * chain, return them first. | ||
459 | */ | ||
460 | if (info->extra_fname && | ||
461 | call_filldir(filp, dirent, filldir, info->extra_fname)) | ||
462 | goto finished; | ||
463 | |||
464 | if (!info->curr_node) | ||
465 | info->curr_node = rb_first(&info->root); | ||
466 | |||
467 | while (1) { | ||
468 | /* | ||
469 | * Fill the rbtree if we have no more entries, | ||
470 | * or the inode has changed since we last read in the | ||
471 | * cached entries. | ||
472 | */ | ||
473 | if ((!info->curr_node) || | ||
474 | (filp->f_version != inode->i_version)) { | ||
475 | info->curr_node = NULL; | ||
476 | free_rb_tree_fname(&info->root); | ||
477 | filp->f_version = inode->i_version; | ||
478 | ret = ext3_htree_fill_tree(filp, info->curr_hash, | ||
479 | info->curr_minor_hash, | ||
480 | &info->next_hash); | ||
481 | if (ret < 0) | ||
482 | return ret; | ||
483 | if (ret == 0) { | ||
484 | filp->f_pos = EXT3_HTREE_EOF; | ||
485 | break; | ||
486 | } | ||
487 | info->curr_node = rb_first(&info->root); | ||
488 | } | ||
489 | |||
490 | fname = rb_entry(info->curr_node, struct fname, rb_hash); | ||
491 | info->curr_hash = fname->hash; | ||
492 | info->curr_minor_hash = fname->minor_hash; | ||
493 | if (call_filldir(filp, dirent, filldir, fname)) | ||
494 | break; | ||
495 | |||
496 | info->curr_node = rb_next(info->curr_node); | ||
497 | if (!info->curr_node) { | ||
498 | if (info->next_hash == ~0) { | ||
499 | filp->f_pos = EXT3_HTREE_EOF; | ||
500 | break; | ||
501 | } | ||
502 | info->curr_hash = info->next_hash; | ||
503 | info->curr_minor_hash = 0; | ||
504 | } | ||
505 | } | ||
506 | finished: | ||
507 | info->last_pos = filp->f_pos; | ||
508 | return 0; | ||
509 | } | ||
510 | |||
511 | static int ext3_release_dir (struct inode * inode, struct file * filp) | ||
512 | { | ||
513 | if (filp->private_data) | ||
514 | ext3_htree_free_dir_info(filp->private_data); | ||
515 | |||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | #endif | ||