aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.h
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-10-29 14:49:59 -0400
committerChris Mason <chris.mason@oracle.com>2008-10-29 14:49:59 -0400
commitc8b978188c9a0fd3d535c13debd19d522b726f1f (patch)
tree873628723fb82fe2a7c77adc65fa93eca1d61c0c /fs/btrfs/ctree.h
parent26ce34a9c47334ff7984769e4661b2f1883594ff (diff)
Btrfs: Add zlib compression support
This is a large change for adding compression on reading and writing, both for inline and regular extents. It does some fairly large surgery to the writeback paths. Compression is off by default and enabled by mount -o compress. Even when the -o compress mount option is not used, it is possible to read compressed extents off the disk. If compression for a given set of pages fails to make them smaller, the file is flagged to avoid future compression attempts later. * While finding delalloc extents, the pages are locked before being sent down to the delalloc handler. This allows the delalloc handler to do complex things such as cleaning the pages, marking them writeback and starting IO on their behalf. * Inline extents are inserted at delalloc time now. This allows us to compress the data before inserting the inline extent, and it allows us to insert an inline extent that spans multiple pages. * All of the in-memory extent representations (extent_map.c, ordered-data.c etc) are changed to record both an in-memory size and an on disk size, as well as a flag for compression. From a disk format point of view, the extent pointers in the file are changed to record the on disk size of a given extent and some encoding flags. Space in the disk format is allocated for compression encoding, as well as encryption and a generic 'other' field. Neither the encryption or the 'other' field are currently used. In order to limit the amount of data read for a single random read in the file, the size of a compressed extent is limited to 128k. This is a software only limit, the disk format supports u64 sized compressed extents. In order to limit the ram consumed while processing extents, the uncompressed size of a compressed extent is limited to 256k. This is a software only limit and will be subject to tuning later. Checksumming is still done on compressed extents, and it is done on the uncompressed version of the data. This way additional encodings can be layered on without having to figure out which encoding to checksum. Compression happens at delalloc time, which is basically singled threaded because it is usually done by a single pdflush thread. This makes it tricky to spread the compression load across all the cpus on the box. We'll have to look at parallel pdflush walks of dirty inodes at a later time. Decompression is hooked into readpages and it does spread across CPUs nicely. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r--fs/btrfs/ctree.h99
1 files changed, 81 insertions, 18 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8559f39fd47f..793d8fdda244 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -400,10 +400,18 @@ struct btrfs_timespec {
400 __le32 nsec; 400 __le32 nsec;
401} __attribute__ ((__packed__)); 401} __attribute__ ((__packed__));
402 402
403/* 403typedef enum {
404 * there is no padding here on purpose. If you want to extent the inode, 404 BTRFS_COMPRESS_NONE = 0,
405 * make a new item type 405 BTRFS_COMPRESS_ZLIB = 1,
406 */ 406 BTRFS_COMPRESS_LAST = 2,
407} btrfs_compression_type;
408
409/* we don't understand any encryption methods right now */
410typedef enum {
411 BTRFS_ENCRYPTION_NONE = 0,
412 BTRFS_ENCRYPTION_LAST = 1,
413} btrfs_encryption_type;
414
407struct btrfs_inode_item { 415struct btrfs_inode_item {
408 /* nfs style generation number */ 416 /* nfs style generation number */
409 __le64 generation; 417 __le64 generation;
@@ -419,6 +427,7 @@ struct btrfs_inode_item {
419 __le64 rdev; 427 __le64 rdev;
420 __le16 flags; 428 __le16 flags;
421 __le16 compat_flags; 429 __le16 compat_flags;
430
422 struct btrfs_timespec atime; 431 struct btrfs_timespec atime;
423 struct btrfs_timespec ctime; 432 struct btrfs_timespec ctime;
424 struct btrfs_timespec mtime; 433 struct btrfs_timespec mtime;
@@ -454,8 +463,33 @@ struct btrfs_root_item {
454#define BTRFS_FILE_EXTENT_INLINE 1 463#define BTRFS_FILE_EXTENT_INLINE 1
455 464
456struct btrfs_file_extent_item { 465struct btrfs_file_extent_item {
466 /*
467 * transaction id that created this extent
468 */
457 __le64 generation; 469 __le64 generation;
470 /*
471 * max number of bytes to hold this extent in ram
472 * when we split a compressed extent we can't know how big
473 * each of the resulting pieces will be. So, this is
474 * an upper limit on the size of the extent in ram instead of
475 * an exact limit.
476 */
477 __le64 ram_bytes;
478
479 /*
480 * 32 bits for the various ways we might encode the data,
481 * including compression and encryption. If any of these
482 * are set to something a given disk format doesn't understand
483 * it is treated like an incompat flag for reading and writing,
484 * but not for stat.
485 */
486 u8 compression;
487 u8 encryption;
488 __le16 other_encoding; /* spare for later use */
489
490 /* are we inline data or a real extent? */
458 u8 type; 491 u8 type;
492
459 /* 493 /*
460 * disk space consumed by the extent, checksum blocks are included 494 * disk space consumed by the extent, checksum blocks are included
461 * in these numbers 495 * in these numbers
@@ -471,9 +505,11 @@ struct btrfs_file_extent_item {
471 */ 505 */
472 __le64 offset; 506 __le64 offset;
473 /* 507 /*
474 * the logical number of file blocks (no csums included) 508 * the logical number of file blocks (no csums included). This
509 * always reflects the size uncompressed and without encoding.
475 */ 510 */
476 __le64 num_bytes; 511 __le64 num_bytes;
512
477} __attribute__ ((__packed__)); 513} __attribute__ ((__packed__));
478 514
479struct btrfs_csum_item { 515struct btrfs_csum_item {
@@ -814,6 +850,7 @@ struct btrfs_root {
814#define BTRFS_MOUNT_NOBARRIER (1 << 2) 850#define BTRFS_MOUNT_NOBARRIER (1 << 2)
815#define BTRFS_MOUNT_SSD (1 << 3) 851#define BTRFS_MOUNT_SSD (1 << 3)
816#define BTRFS_MOUNT_DEGRADED (1 << 4) 852#define BTRFS_MOUNT_DEGRADED (1 << 4)
853#define BTRFS_MOUNT_COMPRESS (1 << 5)
817 854
818#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 855#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
819#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 856#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -825,6 +862,7 @@ struct btrfs_root {
825#define BTRFS_INODE_NODATASUM (1 << 0) 862#define BTRFS_INODE_NODATASUM (1 << 0)
826#define BTRFS_INODE_NODATACOW (1 << 1) 863#define BTRFS_INODE_NODATACOW (1 << 1)
827#define BTRFS_INODE_READONLY (1 << 2) 864#define BTRFS_INODE_READONLY (1 << 2)
865#define BTRFS_INODE_NOCOMPRESS (1 << 3)
828#define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ 866#define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \
829 ~BTRFS_INODE_##flag) 867 ~BTRFS_INODE_##flag)
830#define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ 868#define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \
@@ -1424,14 +1462,6 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
1424 return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; 1462 return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
1425} 1463}
1426 1464
1427static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
1428 struct btrfs_item *e)
1429{
1430 unsigned long offset;
1431 offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
1432 return btrfs_item_size(eb, e) - offset;
1433}
1434
1435BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, 1465BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
1436 disk_bytenr, 64); 1466 disk_bytenr, 64);
1437BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, 1467BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
@@ -1442,6 +1472,36 @@ BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
1442 offset, 64); 1472 offset, 64);
1443BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, 1473BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
1444 num_bytes, 64); 1474 num_bytes, 64);
1475BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item,
1476 ram_bytes, 64);
1477BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item,
1478 compression, 8);
1479BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
1480 encryption, 8);
1481BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
1482 other_encoding, 16);
1483
1484/* this returns the number of file bytes represented by the inline item.
1485 * If an item is compressed, this is the uncompressed size
1486 */
1487static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
1488 struct btrfs_file_extent_item *e)
1489{
1490 return btrfs_file_extent_ram_bytes(eb, e);
1491}
1492
1493/*
1494 * this returns the number of bytes used by the item on disk, minus the
1495 * size of any extent headers. If a file is compressed on disk, this is
1496 * the compressed size
1497 */
1498static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
1499 struct btrfs_item *e)
1500{
1501 unsigned long offset;
1502 offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
1503 return btrfs_item_size(eb, e) - offset;
1504}
1445 1505
1446static inline struct btrfs_root *btrfs_sb(struct super_block *sb) 1506static inline struct btrfs_root *btrfs_sb(struct super_block *sb)
1447{ 1507{
@@ -1745,10 +1805,11 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
1745int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 1805int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
1746 struct bio *bio); 1806 struct bio *bio);
1747int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 1807int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
1748 struct btrfs_root *root, 1808 struct btrfs_root *root,
1749 u64 objectid, u64 pos, u64 disk_offset, 1809 u64 objectid, u64 pos,
1750 u64 disk_num_bytes, 1810 u64 disk_offset, u64 disk_num_bytes,
1751 u64 num_bytes, u64 offset); 1811 u64 num_bytes, u64 offset, u64 ram_bytes,
1812 u8 compression, u8 encryption, u16 other_encoding);
1752int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, 1813int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
1753 struct btrfs_root *root, 1814 struct btrfs_root *root,
1754 struct btrfs_path *path, u64 objectid, 1815 struct btrfs_path *path, u64 objectid,
@@ -1758,6 +1819,8 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
1758 struct btrfs_ordered_sum *sums); 1819 struct btrfs_ordered_sum *sums);
1759int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, 1820int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
1760 struct bio *bio); 1821 struct bio *bio);
1822int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
1823 u64 start, unsigned long len);
1761struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 1824struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
1762 struct btrfs_root *root, 1825 struct btrfs_root *root,
1763 struct btrfs_path *path, 1826 struct btrfs_path *path,
@@ -1799,7 +1862,7 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name,
1799 int namelen); 1862 int namelen);
1800 1863
1801int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 1864int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1802 size_t size, struct bio *bio); 1865 size_t size, struct bio *bio, unsigned long bio_flags);
1803 1866
1804unsigned long btrfs_force_ra(struct address_space *mapping, 1867unsigned long btrfs_force_ra(struct address_space *mapping,
1805 struct file_ra_state *ra, struct file *file, 1868 struct file_ra_state *ra, struct file *file,