aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.h
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-12-08 16:58:54 -0500
committerChris Mason <chris.mason@oracle.com>2008-12-08 16:58:54 -0500
commitd20f7043fa65659136c1a7c3c456eeeb5c6f431f (patch)
tree05d1031cadec6d440a97221e3a32adb504a51699 /fs/btrfs/ctree.h
parentc99e905c945c462085c6d64646dc5af0c0a16815 (diff)
Btrfs: move data checksumming into a dedicated tree
Btrfs stores checksums for each data block. Until now, they have been stored in the subvolume trees, indexed by the inode that is referencing the data block. This means that when we read the inode, we've probably read in at least some checksums as well. But, this has a few problems: * The checksums are indexed by logical offset in the file. When compression is on, this means we have to do the expensive checksumming on the uncompressed data. It would be faster if we could checksum the compressed data instead. * If we implement encryption, we'll be checksumming the plain text and storing that on disk. This is significantly less secure. * For either compression or encryption, we have to get the plain text back before we can verify the checksum as correct. This makes the raid layer balancing and extent moving much more expensive. * It makes the front end caching code more complex, as we have touch the subvolume and inodes as we cache extents. * There is potentitally one copy of the checksum in each subvolume referencing an extent. The solution used here is to store the extent checksums in a dedicated tree. This allows us to index the checksums by phyiscal extent start and length. It means: * The checksum is against the data stored on disk, after any compression or encryption is done. * The checksum is stored in a central location, and can be verified without following back references, or reading inodes. This makes compression significantly faster by reducing the amount of data that needs to be checksummed. It will also allow much faster raid management code in general. The checksums are indexed by a key with a fixed objectid (a magic value in ctree.h) and offset set to the starting byte of the extent. This allows us to copy the checksum items into the fsync log tree directly (or any other tree), without having to invent a second format for them. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r--fs/btrfs/ctree.h30
1 files changed, 20 insertions, 10 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 96f2ec7ad5bd..242b961ae6de 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -73,6 +73,9 @@ struct btrfs_ordered_sum;
73/* directory objectid inside the root tree */ 73/* directory objectid inside the root tree */
74#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL 74#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
75 75
76/* holds checksums of all the data extents */
77#define BTRFS_CSUM_TREE_OBJECTID 7ULL
78
76/* orhpan objectid for tracking unlinked/truncated files */ 79/* orhpan objectid for tracking unlinked/truncated files */
77#define BTRFS_ORPHAN_OBJECTID -5ULL 80#define BTRFS_ORPHAN_OBJECTID -5ULL
78 81
@@ -84,6 +87,13 @@ struct btrfs_ordered_sum;
84#define BTRFS_TREE_RELOC_OBJECTID -8ULL 87#define BTRFS_TREE_RELOC_OBJECTID -8ULL
85#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL 88#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
86 89
90/*
91 * extent checksums all have this objectid
92 * this allows them to share the logging tree
93 * for fsyncs
94 */
95#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
96
87/* dummy objectid represents multiple objectids */ 97/* dummy objectid represents multiple objectids */
88#define BTRFS_MULTIPLE_OBJECTIDS -255ULL 98#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
89 99
@@ -634,6 +644,7 @@ struct btrfs_fs_info {
634 struct btrfs_root *chunk_root; 644 struct btrfs_root *chunk_root;
635 struct btrfs_root *dev_root; 645 struct btrfs_root *dev_root;
636 struct btrfs_root *fs_root; 646 struct btrfs_root *fs_root;
647 struct btrfs_root *csum_root;
637 648
638 /* the log root tree is a directory of all the other log roots */ 649 /* the log root tree is a directory of all the other log roots */
639 struct btrfs_root *log_root_tree; 650 struct btrfs_root *log_root_tree;
@@ -716,6 +727,7 @@ struct btrfs_fs_info {
716 struct btrfs_workers workers; 727 struct btrfs_workers workers;
717 struct btrfs_workers delalloc_workers; 728 struct btrfs_workers delalloc_workers;
718 struct btrfs_workers endio_workers; 729 struct btrfs_workers endio_workers;
730 struct btrfs_workers endio_meta_workers;
719 struct btrfs_workers endio_write_workers; 731 struct btrfs_workers endio_write_workers;
720 struct btrfs_workers submit_workers; 732 struct btrfs_workers submit_workers;
721 /* 733 /*
@@ -858,13 +870,12 @@ struct btrfs_root {
858 * extent data is for file data 870 * extent data is for file data
859 */ 871 */
860#define BTRFS_EXTENT_DATA_KEY 108 872#define BTRFS_EXTENT_DATA_KEY 108
873
861/* 874/*
862 * csum items have the checksums for data in the extents 875 * extent csums are stored in a separate tree and hold csums for
876 * an entire extent on disk.
863 */ 877 */
864#define BTRFS_CSUM_ITEM_KEY 120 878#define BTRFS_EXTENT_CSUM_KEY 128
865
866
867/* reserve 21-31 for other file/dir stuff */
868 879
869/* 880/*
870 * root items point to tree roots. There are typically in the root 881 * root items point to tree roots. There are typically in the root
@@ -1917,7 +1928,7 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
1917 1928
1918/* file-item.c */ 1929/* file-item.c */
1919int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 1930int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
1920 struct bio *bio); 1931 struct bio *bio, u32 *dst);
1921int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 1932int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
1922 struct btrfs_root *root, 1933 struct btrfs_root *root,
1923 u64 objectid, u64 pos, 1934 u64 objectid, u64 pos,
@@ -1929,17 +1940,16 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
1929 struct btrfs_path *path, u64 objectid, 1940 struct btrfs_path *path, u64 objectid,
1930 u64 bytenr, int mod); 1941 u64 bytenr, int mod);
1931int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 1942int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
1932 struct btrfs_root *root, struct inode *inode, 1943 struct btrfs_root *root,
1933 struct btrfs_ordered_sum *sums); 1944 struct btrfs_ordered_sum *sums);
1934int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, 1945int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
1935 struct bio *bio); 1946 struct bio *bio, u64 file_start, int contig);
1936int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode, 1947int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
1937 u64 start, unsigned long len); 1948 u64 start, unsigned long len);
1938struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 1949struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
1939 struct btrfs_root *root, 1950 struct btrfs_root *root,
1940 struct btrfs_path *path, 1951 struct btrfs_path *path,
1941 u64 objectid, u64 offset, 1952 u64 bytenr, int cow);
1942 int cow);
1943int btrfs_csum_truncate(struct btrfs_trans_handle *trans, 1953int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
1944 struct btrfs_root *root, struct btrfs_path *path, 1954 struct btrfs_root *root, struct btrfs_path *path,
1945 u64 isize); 1955 u64 isize);