diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-10-29 14:49:59 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-10-29 14:49:59 -0400 |
commit | c8b978188c9a0fd3d535c13debd19d522b726f1f (patch) | |
tree | 873628723fb82fe2a7c77adc65fa93eca1d61c0c /fs/btrfs/ctree.h | |
parent | 26ce34a9c47334ff7984769e4661b2f1883594ff (diff) |
Btrfs: Add zlib compression support
This is a large change for adding compression on reading and writing,
both for inline and regular extents. It does some fairly large
surgery to the writeback paths.
Compression is off by default and enabled by mount -o compress. Even
when the -o compress mount option is not used, it is possible to read
compressed extents off the disk.
If compression for a given set of pages fails to make them smaller, the
file is flagged to avoid future compression attempts later.
* While finding delalloc extents, the pages are locked before being sent down
to the delalloc handler. This allows the delalloc handler to do complex things
such as cleaning the pages, marking them writeback and starting IO on their
behalf.
* Inline extents are inserted at delalloc time now. This allows us to compress
the data before inserting the inline extent, and it allows us to insert
an inline extent that spans multiple pages.
* All of the in-memory extent representations (extent_map.c, ordered-data.c etc)
are changed to record both an in-memory size and an on disk size, as well
as a flag for compression.
From a disk format point of view, the extent pointers in the file are changed
to record the on disk size of a given extent and some encoding flags.
Space in the disk format is allocated for compression encoding, as well
as encryption and a generic 'other' field. Neither the encryption or the
'other' field are currently used.
In order to limit the amount of data read for a single random read in the
file, the size of a compressed extent is limited to 128k. This is a
software only limit, the disk format supports u64 sized compressed extents.
In order to limit the ram consumed while processing extents, the uncompressed
size of a compressed extent is limited to 256k. This is a software only limit
and will be subject to tuning later.
Checksumming is still done on compressed extents, and it is done on the
uncompressed version of the data. This way additional encodings can be
layered on without having to figure out which encoding to checksum.
Compression happens at delalloc time, which is basically singled threaded because
it is usually done by a single pdflush thread. This makes it tricky to
spread the compression load across all the cpus on the box. We'll have to
look at parallel pdflush walks of dirty inodes at a later time.
Decompression is hooked into readpages and it does spread across CPUs nicely.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r-- | fs/btrfs/ctree.h | 99 |
1 files changed, 81 insertions, 18 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8559f39fd47f..793d8fdda244 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -400,10 +400,18 @@ struct btrfs_timespec { | |||
400 | __le32 nsec; | 400 | __le32 nsec; |
401 | } __attribute__ ((__packed__)); | 401 | } __attribute__ ((__packed__)); |
402 | 402 | ||
403 | /* | 403 | typedef enum { |
404 | * there is no padding here on purpose. If you want to extent the inode, | 404 | BTRFS_COMPRESS_NONE = 0, |
405 | * make a new item type | 405 | BTRFS_COMPRESS_ZLIB = 1, |
406 | */ | 406 | BTRFS_COMPRESS_LAST = 2, |
407 | } btrfs_compression_type; | ||
408 | |||
409 | /* we don't understand any encryption methods right now */ | ||
410 | typedef enum { | ||
411 | BTRFS_ENCRYPTION_NONE = 0, | ||
412 | BTRFS_ENCRYPTION_LAST = 1, | ||
413 | } btrfs_encryption_type; | ||
414 | |||
407 | struct btrfs_inode_item { | 415 | struct btrfs_inode_item { |
408 | /* nfs style generation number */ | 416 | /* nfs style generation number */ |
409 | __le64 generation; | 417 | __le64 generation; |
@@ -419,6 +427,7 @@ struct btrfs_inode_item { | |||
419 | __le64 rdev; | 427 | __le64 rdev; |
420 | __le16 flags; | 428 | __le16 flags; |
421 | __le16 compat_flags; | 429 | __le16 compat_flags; |
430 | |||
422 | struct btrfs_timespec atime; | 431 | struct btrfs_timespec atime; |
423 | struct btrfs_timespec ctime; | 432 | struct btrfs_timespec ctime; |
424 | struct btrfs_timespec mtime; | 433 | struct btrfs_timespec mtime; |
@@ -454,8 +463,33 @@ struct btrfs_root_item { | |||
454 | #define BTRFS_FILE_EXTENT_INLINE 1 | 463 | #define BTRFS_FILE_EXTENT_INLINE 1 |
455 | 464 | ||
456 | struct btrfs_file_extent_item { | 465 | struct btrfs_file_extent_item { |
466 | /* | ||
467 | * transaction id that created this extent | ||
468 | */ | ||
457 | __le64 generation; | 469 | __le64 generation; |
470 | /* | ||
471 | * max number of bytes to hold this extent in ram | ||
472 | * when we split a compressed extent we can't know how big | ||
473 | * each of the resulting pieces will be. So, this is | ||
474 | * an upper limit on the size of the extent in ram instead of | ||
475 | * an exact limit. | ||
476 | */ | ||
477 | __le64 ram_bytes; | ||
478 | |||
479 | /* | ||
480 | * 32 bits for the various ways we might encode the data, | ||
481 | * including compression and encryption. If any of these | ||
482 | * are set to something a given disk format doesn't understand | ||
483 | * it is treated like an incompat flag for reading and writing, | ||
484 | * but not for stat. | ||
485 | */ | ||
486 | u8 compression; | ||
487 | u8 encryption; | ||
488 | __le16 other_encoding; /* spare for later use */ | ||
489 | |||
490 | /* are we inline data or a real extent? */ | ||
458 | u8 type; | 491 | u8 type; |
492 | |||
459 | /* | 493 | /* |
460 | * disk space consumed by the extent, checksum blocks are included | 494 | * disk space consumed by the extent, checksum blocks are included |
461 | * in these numbers | 495 | * in these numbers |
@@ -471,9 +505,11 @@ struct btrfs_file_extent_item { | |||
471 | */ | 505 | */ |
472 | __le64 offset; | 506 | __le64 offset; |
473 | /* | 507 | /* |
474 | * the logical number of file blocks (no csums included) | 508 | * the logical number of file blocks (no csums included). This |
509 | * always reflects the size uncompressed and without encoding. | ||
475 | */ | 510 | */ |
476 | __le64 num_bytes; | 511 | __le64 num_bytes; |
512 | |||
477 | } __attribute__ ((__packed__)); | 513 | } __attribute__ ((__packed__)); |
478 | 514 | ||
479 | struct btrfs_csum_item { | 515 | struct btrfs_csum_item { |
@@ -814,6 +850,7 @@ struct btrfs_root { | |||
814 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) | 850 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) |
815 | #define BTRFS_MOUNT_SSD (1 << 3) | 851 | #define BTRFS_MOUNT_SSD (1 << 3) |
816 | #define BTRFS_MOUNT_DEGRADED (1 << 4) | 852 | #define BTRFS_MOUNT_DEGRADED (1 << 4) |
853 | #define BTRFS_MOUNT_COMPRESS (1 << 5) | ||
817 | 854 | ||
818 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 855 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
819 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 856 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -825,6 +862,7 @@ struct btrfs_root { | |||
825 | #define BTRFS_INODE_NODATASUM (1 << 0) | 862 | #define BTRFS_INODE_NODATASUM (1 << 0) |
826 | #define BTRFS_INODE_NODATACOW (1 << 1) | 863 | #define BTRFS_INODE_NODATACOW (1 << 1) |
827 | #define BTRFS_INODE_READONLY (1 << 2) | 864 | #define BTRFS_INODE_READONLY (1 << 2) |
865 | #define BTRFS_INODE_NOCOMPRESS (1 << 3) | ||
828 | #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ | 866 | #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ |
829 | ~BTRFS_INODE_##flag) | 867 | ~BTRFS_INODE_##flag) |
830 | #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ | 868 | #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ |
@@ -1424,14 +1462,6 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) | |||
1424 | return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; | 1462 | return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; |
1425 | } | 1463 | } |
1426 | 1464 | ||
1427 | static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, | ||
1428 | struct btrfs_item *e) | ||
1429 | { | ||
1430 | unsigned long offset; | ||
1431 | offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
1432 | return btrfs_item_size(eb, e) - offset; | ||
1433 | } | ||
1434 | |||
1435 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, | 1465 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, |
1436 | disk_bytenr, 64); | 1466 | disk_bytenr, 64); |
1437 | BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, | 1467 | BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, |
@@ -1442,6 +1472,36 @@ BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, | |||
1442 | offset, 64); | 1472 | offset, 64); |
1443 | BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, | 1473 | BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, |
1444 | num_bytes, 64); | 1474 | num_bytes, 64); |
1475 | BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item, | ||
1476 | ram_bytes, 64); | ||
1477 | BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item, | ||
1478 | compression, 8); | ||
1479 | BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item, | ||
1480 | encryption, 8); | ||
1481 | BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, | ||
1482 | other_encoding, 16); | ||
1483 | |||
1484 | /* this returns the number of file bytes represented by the inline item. | ||
1485 | * If an item is compressed, this is the uncompressed size | ||
1486 | */ | ||
1487 | static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, | ||
1488 | struct btrfs_file_extent_item *e) | ||
1489 | { | ||
1490 | return btrfs_file_extent_ram_bytes(eb, e); | ||
1491 | } | ||
1492 | |||
1493 | /* | ||
1494 | * this returns the number of bytes used by the item on disk, minus the | ||
1495 | * size of any extent headers. If a file is compressed on disk, this is | ||
1496 | * the compressed size | ||
1497 | */ | ||
1498 | static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, | ||
1499 | struct btrfs_item *e) | ||
1500 | { | ||
1501 | unsigned long offset; | ||
1502 | offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
1503 | return btrfs_item_size(eb, e) - offset; | ||
1504 | } | ||
1445 | 1505 | ||
1446 | static inline struct btrfs_root *btrfs_sb(struct super_block *sb) | 1506 | static inline struct btrfs_root *btrfs_sb(struct super_block *sb) |
1447 | { | 1507 | { |
@@ -1745,10 +1805,11 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1745 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 1805 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
1746 | struct bio *bio); | 1806 | struct bio *bio); |
1747 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 1807 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
1748 | struct btrfs_root *root, | 1808 | struct btrfs_root *root, |
1749 | u64 objectid, u64 pos, u64 disk_offset, | 1809 | u64 objectid, u64 pos, |
1750 | u64 disk_num_bytes, | 1810 | u64 disk_offset, u64 disk_num_bytes, |
1751 | u64 num_bytes, u64 offset); | 1811 | u64 num_bytes, u64 offset, u64 ram_bytes, |
1812 | u8 compression, u8 encryption, u16 other_encoding); | ||
1752 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | 1813 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, |
1753 | struct btrfs_root *root, | 1814 | struct btrfs_root *root, |
1754 | struct btrfs_path *path, u64 objectid, | 1815 | struct btrfs_path *path, u64 objectid, |
@@ -1758,6 +1819,8 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
1758 | struct btrfs_ordered_sum *sums); | 1819 | struct btrfs_ordered_sum *sums); |
1759 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | 1820 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, |
1760 | struct bio *bio); | 1821 | struct bio *bio); |
1822 | int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode, | ||
1823 | u64 start, unsigned long len); | ||
1761 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, | 1824 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, |
1762 | struct btrfs_root *root, | 1825 | struct btrfs_root *root, |
1763 | struct btrfs_path *path, | 1826 | struct btrfs_path *path, |
@@ -1799,7 +1862,7 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, | |||
1799 | int namelen); | 1862 | int namelen); |
1800 | 1863 | ||
1801 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 1864 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
1802 | size_t size, struct bio *bio); | 1865 | size_t size, struct bio *bio, unsigned long bio_flags); |
1803 | 1866 | ||
1804 | unsigned long btrfs_force_ra(struct address_space *mapping, | 1867 | unsigned long btrfs_force_ra(struct address_space *mapping, |
1805 | struct file_ra_state *ra, struct file *file, | 1868 | struct file_ra_state *ra, struct file *file, |