aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorMark Fasheh <mfasheh@suse.com>2008-10-03 17:32:11 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-10-03 17:32:11 -0400
commit00dc417fa3e763345b34ccb6034d72de76eea0a1 (patch)
treec50da655dbd6747499f4516280f323b90214af62 /fs/ocfs2
parentc4b929b85bdb64afacbbf6453b1f2bf7e14c9e89 (diff)
ocfs2: fiemap support
Plug ocfs2 into ->fiemap. Some portions of ocfs2_get_clusters() had to be refactored so that the extent cache can be skipped in favor of going directly to the on-disk records. This makes it easier for us to determine which extent is the last one in the btree. Also, I'm not sure we want to be caching fiemap lookups anyway as they're not directly related to data read/write. Signed-off-by: Mark Fasheh <mfasheh@suse.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: ocfs2-devel@oss.oracle.com Cc: linux-fsdevel@vger.kernel.org
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/alloc.c9
-rw-r--r--fs/ocfs2/alloc.h9
-rw-r--r--fs/ocfs2/extent_map.c346
-rw-r--r--fs/ocfs2/extent_map.h3
-rw-r--r--fs/ocfs2/file.c1
5 files changed, 306 insertions, 62 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 10bfb466e068..29ff57ec5d1f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -990,15 +990,6 @@ out:
990} 990}
991 991
992/* 992/*
993 * This is only valid for leaf nodes, which are the only ones that can
994 * have empty extents anyway.
995 */
996static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
997{
998 return !rec->e_leaf_clusters;
999}
1000
1001/*
1002 * This function will discard the rightmost extent record. 993 * This function will discard the rightmost extent record.
1003 */ 994 */
1004static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) 995static void ocfs2_shift_records_right(struct ocfs2_extent_list *el)
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 42ff94bd8011..60cd3d59230c 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -146,4 +146,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el,
146 return le16_to_cpu(rec->e_leaf_clusters); 146 return le16_to_cpu(rec->e_leaf_clusters);
147} 147}
148 148
149/*
150 * This is only valid for leaf nodes, which are the only ones that can
151 * have empty extents anyway.
152 */
153static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
154{
155 return !rec->e_leaf_clusters;
156}
157
149#endif /* OCFS2_ALLOC_H */ 158#endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index c58668a326fe..aed268e80b49 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -25,6 +25,7 @@
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/fiemap.h>
28 29
29#define MLOG_MASK_PREFIX ML_EXTENT_MAP 30#define MLOG_MASK_PREFIX ML_EXTENT_MAP
30#include <cluster/masklog.h> 31#include <cluster/masklog.h>
@@ -32,6 +33,7 @@
32#include "ocfs2.h" 33#include "ocfs2.h"
33 34
34#include "alloc.h" 35#include "alloc.h"
36#include "dlmglue.h"
35#include "extent_map.h" 37#include "extent_map.h"
36#include "inode.h" 38#include "inode.h"
37#include "super.h" 39#include "super.h"
@@ -282,6 +284,51 @@ out:
282 kfree(new_emi); 284 kfree(new_emi);
283} 285}
284 286
287static int ocfs2_last_eb_is_empty(struct inode *inode,
288 struct ocfs2_dinode *di)
289{
290 int ret, next_free;
291 u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
292 struct buffer_head *eb_bh = NULL;
293 struct ocfs2_extent_block *eb;
294 struct ocfs2_extent_list *el;
295
296 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk,
297 &eb_bh, OCFS2_BH_CACHED, inode);
298 if (ret) {
299 mlog_errno(ret);
300 goto out;
301 }
302
303 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
304 el = &eb->h_list;
305
306 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
307 ret = -EROFS;
308 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
309 goto out;
310 }
311
312 if (el->l_tree_depth) {
313 ocfs2_error(inode->i_sb,
314 "Inode %lu has non zero tree depth in "
315 "leaf block %llu\n", inode->i_ino,
316 (unsigned long long)eb_bh->b_blocknr);
317 ret = -EROFS;
318 goto out;
319 }
320
321 next_free = le16_to_cpu(el->l_next_free_rec);
322
323 if (next_free == 0 ||
324 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
325 ret = 1;
326
327out:
328 brelse(eb_bh);
329 return ret;
330}
331
285/* 332/*
286 * Return the 1st index within el which contains an extent start 333 * Return the 1st index within el which contains an extent start
287 * larger than v_cluster. 334 * larger than v_cluster.
@@ -373,42 +420,28 @@ out:
373 return ret; 420 return ret;
374} 421}
375 422
376int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 423static int ocfs2_get_clusters_nocache(struct inode *inode,
377 u32 *p_cluster, u32 *num_clusters, 424 struct buffer_head *di_bh,
378 unsigned int *extent_flags) 425 u32 v_cluster, unsigned int *hole_len,
426 struct ocfs2_extent_rec *ret_rec,
427 unsigned int *is_last)
379{ 428{
380 int ret, i; 429 int i, ret, tree_height, len;
381 unsigned int flags = 0;
382 struct buffer_head *di_bh = NULL;
383 struct buffer_head *eb_bh = NULL;
384 struct ocfs2_dinode *di; 430 struct ocfs2_dinode *di;
385 struct ocfs2_extent_block *eb; 431 struct ocfs2_extent_block *uninitialized_var(eb);
386 struct ocfs2_extent_list *el; 432 struct ocfs2_extent_list *el;
387 struct ocfs2_extent_rec *rec; 433 struct ocfs2_extent_rec *rec;
388 u32 coff; 434 struct buffer_head *eb_bh = NULL;
389
390 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
391 ret = -ERANGE;
392 mlog_errno(ret);
393 goto out;
394 }
395
396 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
397 num_clusters, extent_flags);
398 if (ret == 0)
399 goto out;
400 435
401 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, 436 memset(ret_rec, 0, sizeof(*ret_rec));
402 &di_bh, OCFS2_BH_CACHED, inode); 437 if (is_last)
403 if (ret) { 438 *is_last = 0;
404 mlog_errno(ret);
405 goto out;
406 }
407 439
408 di = (struct ocfs2_dinode *) di_bh->b_data; 440 di = (struct ocfs2_dinode *) di_bh->b_data;
409 el = &di->id2.i_list; 441 el = &di->id2.i_list;
442 tree_height = le16_to_cpu(el->l_tree_depth);
410 443
411 if (el->l_tree_depth) { 444 if (tree_height > 0) {
412 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); 445 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
413 if (ret) { 446 if (ret) {
414 mlog_errno(ret); 447 mlog_errno(ret);
@@ -431,46 +464,143 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
431 i = ocfs2_search_extent_list(el, v_cluster); 464 i = ocfs2_search_extent_list(el, v_cluster);
432 if (i == -1) { 465 if (i == -1) {
433 /* 466 /*
434 * A hole was found. Return some canned values that 467 * Holes can be larger than the maximum size of an
435 * callers can key on. If asked for, num_clusters will 468 * extent, so we return their lengths in a seperate
436 * be populated with the size of the hole. 469 * field.
437 */ 470 */
438 *p_cluster = 0; 471 if (hole_len) {
439 if (num_clusters) {
440 ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, 472 ret = ocfs2_figure_hole_clusters(inode, el, eb_bh,
441 v_cluster, 473 v_cluster, &len);
442 num_clusters);
443 if (ret) { 474 if (ret) {
444 mlog_errno(ret); 475 mlog_errno(ret);
445 goto out; 476 goto out;
446 } 477 }
478
479 *hole_len = len;
447 } 480 }
448 } else { 481 goto out_hole;
449 rec = &el->l_recs[i]; 482 }
450 483
451 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 484 rec = &el->l_recs[i];
452 485
453 if (!rec->e_blkno) { 486 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
454 ocfs2_error(inode->i_sb, "Inode %lu has bad extent " 487
455 "record (%u, %u, 0)", inode->i_ino, 488 if (!rec->e_blkno) {
456 le32_to_cpu(rec->e_cpos), 489 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
457 ocfs2_rec_clusters(el, rec)); 490 "record (%u, %u, 0)", inode->i_ino,
458 ret = -EROFS; 491 le32_to_cpu(rec->e_cpos),
459 goto out; 492 ocfs2_rec_clusters(el, rec));
493 ret = -EROFS;
494 goto out;
495 }
496
497 *ret_rec = *rec;
498
499 /*
500 * Checking for last extent is potentially expensive - we
501 * might have to look at the next leaf over to see if it's
502 * empty.
503 *
504 * The first two checks are to see whether the caller even
505 * cares for this information, and if the extent is at least
506 * the last in it's list.
507 *
508 * If those hold true, then the extent is last if any of the
509 * additional conditions hold true:
510 * - Extent list is in-inode
511 * - Extent list is right-most
512 * - Extent list is 2nd to rightmost, with empty right-most
513 */
514 if (is_last) {
515 if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
516 if (tree_height == 0)
517 *is_last = 1;
518 else if (eb->h_blkno == di->i_last_eb_blk)
519 *is_last = 1;
520 else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
521 ret = ocfs2_last_eb_is_empty(inode, di);
522 if (ret < 0) {
523 mlog_errno(ret);
524 goto out;
525 }
526 if (ret == 1)
527 *is_last = 1;
528 }
460 } 529 }
530 }
531
532out_hole:
533 ret = 0;
534out:
535 brelse(eb_bh);
536 return ret;
537}
538
539static void ocfs2_relative_extent_offsets(struct super_block *sb,
540 u32 v_cluster,
541 struct ocfs2_extent_rec *rec,
542 u32 *p_cluster, u32 *num_clusters)
543
544{
545 u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
546
547 *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
548 *p_cluster = *p_cluster + coff;
549
550 if (num_clusters)
551 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
552}
553
554int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
555 u32 *p_cluster, u32 *num_clusters,
556 unsigned int *extent_flags)
557{
558 int ret;
559 unsigned int uninitialized_var(hole_len), flags = 0;
560 struct buffer_head *di_bh = NULL;
561 struct ocfs2_extent_rec rec;
461 562
462 coff = v_cluster - le32_to_cpu(rec->e_cpos); 563 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
564 ret = -ERANGE;
565 mlog_errno(ret);
566 goto out;
567 }
463 568
464 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, 569 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
465 le64_to_cpu(rec->e_blkno)); 570 num_clusters, extent_flags);
466 *p_cluster = *p_cluster + coff; 571 if (ret == 0)
572 goto out;
467 573
468 if (num_clusters) 574 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
469 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 575 &di_bh, OCFS2_BH_CACHED, inode);
576 if (ret) {
577 mlog_errno(ret);
578 goto out;
579 }
470 580
471 flags = rec->e_flags; 581 ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
582 &rec, NULL);
583 if (ret) {
584 mlog_errno(ret);
585 goto out;
586 }
472 587
473 ocfs2_extent_map_insert_rec(inode, rec); 588 if (rec.e_blkno == 0ULL) {
589 /*
590 * A hole was found. Return some canned values that
591 * callers can key on. If asked for, num_clusters will
592 * be populated with the size of the hole.
593 */
594 *p_cluster = 0;
595 if (num_clusters) {
596 *num_clusters = hole_len;
597 }
598 } else {
599 ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
600 p_cluster, num_clusters);
601 flags = rec.e_flags;
602
603 ocfs2_extent_map_insert_rec(inode, &rec);
474 } 604 }
475 605
476 if (extent_flags) 606 if (extent_flags)
@@ -478,7 +608,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
478 608
479out: 609out:
480 brelse(di_bh); 610 brelse(di_bh);
481 brelse(eb_bh);
482 return ret; 611 return ret;
483} 612}
484 613
@@ -521,3 +650,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
521out: 650out:
522 return ret; 651 return ret;
523} 652}
653
654static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
655 struct fiemap_extent_info *fieinfo,
656 u64 map_start)
657{
658 int ret;
659 unsigned int id_count;
660 struct ocfs2_dinode *di;
661 u64 phys;
662 u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
663 struct ocfs2_inode_info *oi = OCFS2_I(inode);
664
665 di = (struct ocfs2_dinode *)di_bh->b_data;
666 id_count = le16_to_cpu(di->id2.i_data.id_count);
667
668 if (map_start < id_count) {
669 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
670 phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data);
671
672 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
673 flags);
674 if (ret < 0)
675 return ret;
676 }
677
678 return 0;
679}
680
681#define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
682
683int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
684 u64 map_start, u64 map_len)
685{
686 int ret, is_last;
687 u32 mapping_end, cpos;
688 unsigned int hole_size;
689 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
690 u64 len_bytes, phys_bytes, virt_bytes;
691 struct buffer_head *di_bh = NULL;
692 struct ocfs2_extent_rec rec;
693
694 ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
695 if (ret)
696 return ret;
697
698 ret = ocfs2_inode_lock(inode, &di_bh, 0);
699 if (ret) {
700 mlog_errno(ret);
701 goto out;
702 }
703
704 down_read(&OCFS2_I(inode)->ip_alloc_sem);
705
706 /*
707 * Handle inline-data separately.
708 */
709 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
710 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
711 goto out_unlock;
712 }
713
714 cpos = map_start >> osb->s_clustersize_bits;
715 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
716 map_start + map_len);
717 mapping_end -= cpos;
718 is_last = 0;
719 while (cpos < mapping_end && !is_last) {
720 u32 fe_flags;
721
722 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
723 &hole_size, &rec, &is_last);
724 if (ret) {
725 mlog_errno(ret);
726 goto out;
727 }
728
729 if (rec.e_blkno == 0ULL) {
730 cpos += hole_size;
731 continue;
732 }
733
734 fe_flags = 0;
735 if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
736 fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
737 if (is_last)
738 fe_flags |= FIEMAP_EXTENT_LAST;
739 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
740 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
741 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
742
743 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
744 len_bytes, fe_flags);
745 if (ret)
746 break;
747
748 cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
749 }
750
751 if (ret > 0)
752 ret = 0;
753
754out_unlock:
755 brelse(di_bh);
756
757 up_read(&OCFS2_I(inode)->ip_alloc_sem);
758
759 ocfs2_inode_unlock(inode, 0);
760out:
761
762 return ret;
763}
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index de91e3e41a22..1b97490e1ea8 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -50,4 +50,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
50int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, 50int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
51 u64 *ret_count, unsigned int *extent_flags); 51 u64 *ret_count, unsigned int *extent_flags);
52 52
53int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
54 u64 map_start, u64 map_len);
55
53#endif /* _EXTENT_MAP_H */ 56#endif /* _EXTENT_MAP_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ec2ed15c3daa..ed38796052d2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2228,6 +2228,7 @@ const struct inode_operations ocfs2_file_iops = {
2228 .getattr = ocfs2_getattr, 2228 .getattr = ocfs2_getattr,
2229 .permission = ocfs2_permission, 2229 .permission = ocfs2_permission,
2230 .fallocate = ocfs2_fallocate, 2230 .fallocate = ocfs2_fallocate,
2231 .fiemap = ocfs2_fiemap,
2231}; 2232};
2232 2233
2233const struct inode_operations ocfs2_special_file_iops = { 2234const struct inode_operations ocfs2_special_file_iops = {