diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/Kconfig | 2 | ||||
| -rw-r--r-- | fs/btrfs/Makefile | 3 | ||||
| -rw-r--r-- | fs/btrfs/compression.c | 454 | ||||
| -rw-r--r-- | fs/btrfs/compression.h | 47 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 99 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 18 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.h | 1 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 27 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 411 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 17 | ||||
| -rw-r--r-- | fs/btrfs/extent_map.c | 9 | ||||
| -rw-r--r-- | fs/btrfs/extent_map.h | 6 | ||||
| -rw-r--r-- | fs/btrfs/file-item.c | 75 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 263 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 584 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.c | 9 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.h | 10 | ||||
| -rw-r--r-- | fs/btrfs/print-tree.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/tree-log.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/zlib.c | 637 |
22 files changed, 2315 insertions, 379 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 18f5a85b47c..31cce5d88b1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -501,6 +501,8 @@ config BTRFS_FS | |||
| 501 | tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" | 501 | tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" |
| 502 | depends on EXPERIMENTAL | 502 | depends on EXPERIMENTAL |
| 503 | select LIBCRC32C | 503 | select LIBCRC32C |
| 504 | select ZLIB_INFLATE | ||
| 505 | select ZLIB_DEFLATE | ||
| 504 | help | 506 | help |
| 505 | Btrfs is a new filesystem with extents, writable snapshotting, | 507 | Btrfs is a new filesystem with extents, writable snapshotting, |
| 506 | support for multiple devices and many more features. | 508 | support for multiple devices and many more features. |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 7125716e142..d2cf5a54a4b 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
| @@ -7,7 +7,8 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
| 7 | transaction.o inode.o file.o tree-defrag.o \ | 7 | transaction.o inode.o file.o tree-defrag.o \ |
| 8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
| 9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
| 10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o | 10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ |
| 11 | compression.o | ||
| 11 | else | 12 | else |
| 12 | 13 | ||
| 13 | # Normal Makefile | 14 | # Normal Makefile |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c new file mode 100644 index 00000000000..c5470367ca5 --- /dev/null +++ b/fs/btrfs/compression.c | |||
| @@ -0,0 +1,454 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/kernel.h> | ||
| 20 | #include <linux/bio.h> | ||
| 21 | #include <linux/buffer_head.h> | ||
| 22 | #include <linux/file.h> | ||
| 23 | #include <linux/fs.h> | ||
| 24 | #include <linux/pagemap.h> | ||
| 25 | #include <linux/highmem.h> | ||
| 26 | #include <linux/time.h> | ||
| 27 | #include <linux/init.h> | ||
| 28 | #include <linux/string.h> | ||
| 29 | #include <linux/smp_lock.h> | ||
| 30 | #include <linux/backing-dev.h> | ||
| 31 | #include <linux/mpage.h> | ||
| 32 | #include <linux/swap.h> | ||
| 33 | #include <linux/writeback.h> | ||
| 34 | #include <linux/bit_spinlock.h> | ||
| 35 | #include <linux/version.h> | ||
| 36 | #include "ctree.h" | ||
| 37 | #include "disk-io.h" | ||
| 38 | #include "transaction.h" | ||
| 39 | #include "btrfs_inode.h" | ||
| 40 | #include "volumes.h" | ||
| 41 | #include "ordered-data.h" | ||
| 42 | #include "compat.h" | ||
| 43 | #include "compression.h" | ||
| 44 | #include "extent_io.h" | ||
| 45 | #include "extent_map.h" | ||
| 46 | |||
| 47 | struct compressed_bio { | ||
| 48 | /* number of bios pending for this compressed extent */ | ||
| 49 | atomic_t pending_bios; | ||
| 50 | |||
| 51 | /* the pages with the compressed data on them */ | ||
| 52 | struct page **compressed_pages; | ||
| 53 | |||
| 54 | /* inode that owns this data */ | ||
| 55 | struct inode *inode; | ||
| 56 | |||
| 57 | /* starting offset in the inode for our pages */ | ||
| 58 | u64 start; | ||
| 59 | |||
| 60 | /* number of bytes in the inode we're working on */ | ||
| 61 | unsigned long len; | ||
| 62 | |||
| 63 | /* number of bytes on disk */ | ||
| 64 | unsigned long compressed_len; | ||
| 65 | |||
| 66 | /* number of compressed pages in the array */ | ||
| 67 | unsigned long nr_pages; | ||
| 68 | |||
| 69 | /* IO errors */ | ||
| 70 | int errors; | ||
| 71 | |||
| 72 | /* for reads, this is the bio we are copying the data into */ | ||
| 73 | struct bio *orig_bio; | ||
| 74 | }; | ||
| 75 | |||
| 76 | static struct bio *compressed_bio_alloc(struct block_device *bdev, | ||
| 77 | u64 first_byte, gfp_t gfp_flags) | ||
| 78 | { | ||
| 79 | struct bio *bio; | ||
| 80 | int nr_vecs; | ||
| 81 | |||
| 82 | nr_vecs = bio_get_nr_vecs(bdev); | ||
| 83 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
| 84 | |||
| 85 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | ||
| 86 | while (!bio && (nr_vecs /= 2)) | ||
| 87 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
| 88 | } | ||
| 89 | |||
| 90 | if (bio) { | ||
| 91 | bio->bi_size = 0; | ||
| 92 | bio->bi_bdev = bdev; | ||
| 93 | bio->bi_sector = first_byte >> 9; | ||
| 94 | } | ||
| 95 | return bio; | ||
| 96 | } | ||
| 97 | |||
| 98 | /* when we finish reading compressed pages from the disk, we | ||
| 99 | * decompress them and then run the bio end_io routines on the | ||
| 100 | * decompressed pages (in the inode address space). | ||
| 101 | * | ||
| 102 | * This allows the checksumming and other IO error handling routines | ||
| 103 | * to work normally | ||
| 104 | * | ||
| 105 | * The compressed pages are freed here, and it must be run | ||
| 106 | * in process context | ||
| 107 | */ | ||
| 108 | static void end_compressed_bio_read(struct bio *bio, int err) | ||
| 109 | { | ||
| 110 | struct extent_io_tree *tree; | ||
| 111 | struct compressed_bio *cb = bio->bi_private; | ||
| 112 | struct inode *inode; | ||
| 113 | struct page *page; | ||
| 114 | unsigned long index; | ||
| 115 | int ret; | ||
| 116 | |||
| 117 | if (err) | ||
| 118 | cb->errors = 1; | ||
| 119 | |||
| 120 | /* if there are more bios still pending for this compressed | ||
| 121 | * extent, just exit | ||
| 122 | */ | ||
| 123 | if (!atomic_dec_and_test(&cb->pending_bios)) | ||
| 124 | goto out; | ||
| 125 | |||
| 126 | /* ok, we're the last bio for this extent, lets start | ||
| 127 | * the decompression. | ||
| 128 | */ | ||
| 129 | inode = cb->inode; | ||
| 130 | tree = &BTRFS_I(inode)->io_tree; | ||
| 131 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | ||
| 132 | cb->start, | ||
| 133 | cb->orig_bio->bi_io_vec, | ||
| 134 | cb->orig_bio->bi_vcnt, | ||
| 135 | cb->compressed_len); | ||
| 136 | if (ret) | ||
| 137 | cb->errors = 1; | ||
| 138 | |||
| 139 | /* release the compressed pages */ | ||
| 140 | index = 0; | ||
| 141 | for (index = 0; index < cb->nr_pages; index++) { | ||
| 142 | page = cb->compressed_pages[index]; | ||
| 143 | page->mapping = NULL; | ||
| 144 | page_cache_release(page); | ||
| 145 | } | ||
| 146 | |||
| 147 | /* do io completion on the original bio */ | ||
| 148 | if (cb->errors) | ||
| 149 | bio_io_error(cb->orig_bio); | ||
| 150 | else | ||
| 151 | bio_endio(cb->orig_bio, 0); | ||
| 152 | |||
| 153 | /* finally free the cb struct */ | ||
| 154 | kfree(cb->compressed_pages); | ||
| 155 | kfree(cb); | ||
| 156 | out: | ||
| 157 | bio_put(bio); | ||
| 158 | } | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Clear the writeback bits on all of the file | ||
| 162 | * pages for a compressed write | ||
| 163 | */ | ||
| 164 | static noinline int end_compressed_writeback(struct inode *inode, u64 start, | ||
| 165 | unsigned long ram_size) | ||
| 166 | { | ||
| 167 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 168 | unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT; | ||
| 169 | struct page *pages[16]; | ||
| 170 | unsigned long nr_pages = end_index - index + 1; | ||
| 171 | int i; | ||
| 172 | int ret; | ||
| 173 | |||
| 174 | while(nr_pages > 0) { | ||
| 175 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
| 176 | min(nr_pages, ARRAY_SIZE(pages)), pages); | ||
| 177 | if (ret == 0) { | ||
| 178 | nr_pages -= 1; | ||
| 179 | index += 1; | ||
| 180 | continue; | ||
| 181 | } | ||
| 182 | for (i = 0; i < ret; i++) { | ||
| 183 | end_page_writeback(pages[i]); | ||
| 184 | page_cache_release(pages[i]); | ||
| 185 | } | ||
| 186 | nr_pages -= ret; | ||
| 187 | index += ret; | ||
| 188 | } | ||
| 189 | /* the inode may be gone now */ | ||
| 190 | return 0; | ||
| 191 | } | ||
| 192 | |||
| 193 | /* | ||
| 194 | * do the cleanup once all the compressed pages hit the disk. | ||
| 195 | * This will clear writeback on the file pages and free the compressed | ||
| 196 | * pages. | ||
| 197 | * | ||
| 198 | * This also calls the writeback end hooks for the file pages so that | ||
| 199 | * metadata and checksums can be updated in the file. | ||
| 200 | */ | ||
| 201 | static void end_compressed_bio_write(struct bio *bio, int err) | ||
| 202 | { | ||
| 203 | struct extent_io_tree *tree; | ||
| 204 | struct compressed_bio *cb = bio->bi_private; | ||
| 205 | struct inode *inode; | ||
| 206 | struct page *page; | ||
| 207 | unsigned long index; | ||
| 208 | |||
| 209 | if (err) | ||
| 210 | cb->errors = 1; | ||
| 211 | |||
| 212 | /* if there are more bios still pending for this compressed | ||
| 213 | * extent, just exit | ||
| 214 | */ | ||
| 215 | if (!atomic_dec_and_test(&cb->pending_bios)) | ||
| 216 | goto out; | ||
| 217 | |||
| 218 | /* ok, we're the last bio for this extent, step one is to | ||
| 219 | * call back into the FS and do all the end_io operations | ||
| 220 | */ | ||
| 221 | inode = cb->inode; | ||
| 222 | tree = &BTRFS_I(inode)->io_tree; | ||
| 223 | tree->ops->writepage_end_io_hook(cb->compressed_pages[0], | ||
| 224 | cb->start, | ||
| 225 | cb->start + cb->len - 1, | ||
| 226 | NULL, 1); | ||
| 227 | |||
| 228 | end_compressed_writeback(inode, cb->start, cb->len); | ||
| 229 | /* note, our inode could be gone now */ | ||
| 230 | |||
| 231 | /* | ||
| 232 | * release the compressed pages, these came from alloc_page and | ||
| 233 | * are not attached to the inode at all | ||
| 234 | */ | ||
| 235 | index = 0; | ||
| 236 | for (index = 0; index < cb->nr_pages; index++) { | ||
| 237 | page = cb->compressed_pages[index]; | ||
| 238 | page->mapping = NULL; | ||
| 239 | page_cache_release(page); | ||
| 240 | } | ||
| 241 | |||
| 242 | /* finally free the cb struct */ | ||
| 243 | kfree(cb->compressed_pages); | ||
| 244 | kfree(cb); | ||
| 245 | out: | ||
| 246 | bio_put(bio); | ||
| 247 | } | ||
| 248 | |||
| 249 | /* | ||
| 250 | * worker function to build and submit bios for previously compressed pages. | ||
| 251 | * The corresponding pages in the inode should be marked for writeback | ||
| 252 | * and the compressed pages should have a reference on them for dropping | ||
| 253 | * when the IO is complete. | ||
| 254 | * | ||
| 255 | * This also checksums the file bytes and gets things ready for | ||
| 256 | * the end io hooks. | ||
| 257 | */ | ||
| 258 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | ||
| 259 | unsigned long len, u64 disk_start, | ||
| 260 | unsigned long compressed_len, | ||
| 261 | struct page **compressed_pages, | ||
| 262 | unsigned long nr_pages) | ||
| 263 | { | ||
| 264 | struct bio *bio = NULL; | ||
| 265 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 266 | struct compressed_bio *cb; | ||
| 267 | unsigned long bytes_left; | ||
| 268 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 269 | int page_index = 0; | ||
| 270 | struct page *page; | ||
| 271 | u64 first_byte = disk_start; | ||
| 272 | struct block_device *bdev; | ||
| 273 | int ret; | ||
| 274 | |||
| 275 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | ||
| 276 | cb = kmalloc(sizeof(*cb), GFP_NOFS); | ||
| 277 | atomic_set(&cb->pending_bios, 0); | ||
| 278 | cb->errors = 0; | ||
| 279 | cb->inode = inode; | ||
| 280 | cb->start = start; | ||
| 281 | cb->len = len; | ||
| 282 | cb->compressed_pages = compressed_pages; | ||
| 283 | cb->compressed_len = compressed_len; | ||
| 284 | cb->orig_bio = NULL; | ||
| 285 | cb->nr_pages = nr_pages; | ||
| 286 | |||
| 287 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
| 288 | |||
| 289 | ret = btrfs_csum_file_bytes(root, inode, start, len); | ||
| 290 | BUG_ON(ret); | ||
| 291 | |||
| 292 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | ||
| 293 | bio->bi_private = cb; | ||
| 294 | bio->bi_end_io = end_compressed_bio_write; | ||
| 295 | atomic_inc(&cb->pending_bios); | ||
| 296 | |||
| 297 | /* create and submit bios for the compressed pages */ | ||
| 298 | bytes_left = compressed_len; | ||
| 299 | while(bytes_left > 0) { | ||
| 300 | page = compressed_pages[page_index]; | ||
| 301 | page->mapping = inode->i_mapping; | ||
| 302 | if (bio->bi_size) | ||
| 303 | ret = io_tree->ops->merge_bio_hook(page, 0, | ||
| 304 | PAGE_CACHE_SIZE, | ||
| 305 | bio, 0); | ||
| 306 | else | ||
| 307 | ret = 0; | ||
| 308 | |||
| 309 | if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < | ||
| 310 | PAGE_CACHE_SIZE) { | ||
| 311 | bio_get(bio); | ||
| 312 | |||
| 313 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
| 314 | BUG_ON(ret); | ||
| 315 | |||
| 316 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | ||
| 317 | BUG_ON(ret); | ||
| 318 | |||
| 319 | bio_put(bio); | ||
| 320 | |||
| 321 | bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); | ||
| 322 | atomic_inc(&cb->pending_bios); | ||
| 323 | bio->bi_private = cb; | ||
| 324 | bio->bi_end_io = end_compressed_bio_write; | ||
| 325 | bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); | ||
| 326 | } | ||
| 327 | page_index++; | ||
| 328 | bytes_left -= PAGE_CACHE_SIZE; | ||
| 329 | first_byte += PAGE_CACHE_SIZE; | ||
| 330 | } | ||
| 331 | bio_get(bio); | ||
| 332 | |||
| 333 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
| 334 | BUG_ON(ret); | ||
| 335 | |||
| 336 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | ||
| 337 | BUG_ON(ret); | ||
| 338 | |||
| 339 | bio_put(bio); | ||
| 340 | return 0; | ||
| 341 | } | ||
| 342 | |||
| 343 | /* | ||
| 344 | * for a compressed read, the bio we get passed has all the inode pages | ||
| 345 | * in it. We don't actually do IO on those pages but allocate new ones | ||
| 346 | * to hold the compressed pages on disk. | ||
| 347 | * | ||
| 348 | * bio->bi_sector points to the compressed extent on disk | ||
| 349 | * bio->bi_io_vec points to all of the inode pages | ||
| 350 | * bio->bi_vcnt is a count of pages | ||
| 351 | * | ||
| 352 | * After the compressed pages are read, we copy the bytes into the | ||
| 353 | * bio we were passed and then call the bio end_io calls | ||
| 354 | */ | ||
| 355 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | ||
| 356 | int mirror_num, unsigned long bio_flags) | ||
| 357 | { | ||
| 358 | struct extent_io_tree *tree; | ||
| 359 | struct extent_map_tree *em_tree; | ||
| 360 | struct compressed_bio *cb; | ||
| 361 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 362 | unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; | ||
| 363 | unsigned long compressed_len; | ||
| 364 | unsigned long nr_pages; | ||
| 365 | unsigned long page_index; | ||
| 366 | struct page *page; | ||
| 367 | struct block_device *bdev; | ||
| 368 | struct bio *comp_bio; | ||
| 369 | u64 cur_disk_byte = (u64)bio->bi_sector << 9; | ||
| 370 | struct extent_map *em; | ||
| 371 | int ret; | ||
| 372 | |||
| 373 | tree = &BTRFS_I(inode)->io_tree; | ||
| 374 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 375 | |||
| 376 | /* we need the actual starting offset of this extent in the file */ | ||
| 377 | spin_lock(&em_tree->lock); | ||
| 378 | em = lookup_extent_mapping(em_tree, | ||
| 379 | page_offset(bio->bi_io_vec->bv_page), | ||
| 380 | PAGE_CACHE_SIZE); | ||
| 381 | spin_unlock(&em_tree->lock); | ||
| 382 | |||
| 383 | cb = kmalloc(sizeof(*cb), GFP_NOFS); | ||
| 384 | atomic_set(&cb->pending_bios, 0); | ||
| 385 | cb->errors = 0; | ||
| 386 | cb->inode = inode; | ||
| 387 | |||
| 388 | cb->start = em->start; | ||
| 389 | compressed_len = em->block_len; | ||
| 390 | free_extent_map(em); | ||
| 391 | |||
| 392 | cb->len = uncompressed_len; | ||
| 393 | cb->compressed_len = compressed_len; | ||
| 394 | cb->orig_bio = bio; | ||
| 395 | |||
| 396 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | ||
| 397 | PAGE_CACHE_SIZE; | ||
| 398 | cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, | ||
| 399 | GFP_NOFS); | ||
| 400 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
| 401 | |||
| 402 | for (page_index = 0; page_index < nr_pages; page_index++) { | ||
| 403 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | | ||
| 404 | __GFP_HIGHMEM); | ||
| 405 | } | ||
| 406 | cb->nr_pages = nr_pages; | ||
| 407 | |||
| 408 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); | ||
| 409 | comp_bio->bi_private = cb; | ||
| 410 | comp_bio->bi_end_io = end_compressed_bio_read; | ||
| 411 | atomic_inc(&cb->pending_bios); | ||
| 412 | |||
| 413 | for (page_index = 0; page_index < nr_pages; page_index++) { | ||
| 414 | page = cb->compressed_pages[page_index]; | ||
| 415 | page->mapping = inode->i_mapping; | ||
| 416 | if (comp_bio->bi_size) | ||
| 417 | ret = tree->ops->merge_bio_hook(page, 0, | ||
| 418 | PAGE_CACHE_SIZE, | ||
| 419 | comp_bio, 0); | ||
| 420 | else | ||
| 421 | ret = 0; | ||
| 422 | |||
| 423 | if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) < | ||
| 424 | PAGE_CACHE_SIZE) { | ||
| 425 | bio_get(comp_bio); | ||
| 426 | |||
| 427 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | ||
| 428 | BUG_ON(ret); | ||
| 429 | |||
| 430 | ret = btrfs_map_bio(root, READ, comp_bio, 0, 0); | ||
| 431 | BUG_ON(ret); | ||
| 432 | |||
| 433 | bio_put(comp_bio); | ||
| 434 | |||
| 435 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, | ||
| 436 | GFP_NOFS); | ||
| 437 | atomic_inc(&cb->pending_bios); | ||
| 438 | bio->bi_private = cb; | ||
| 439 | bio->bi_end_io = end_compressed_bio_write; | ||
| 440 | bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); | ||
| 441 | } | ||
| 442 | cur_disk_byte += PAGE_CACHE_SIZE; | ||
| 443 | } | ||
| 444 | bio_get(comp_bio); | ||
| 445 | |||
| 446 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | ||
| 447 | BUG_ON(ret); | ||
| 448 | |||
| 449 | ret = btrfs_map_bio(root, READ, comp_bio, 0, 0); | ||
| 450 | BUG_ON(ret); | ||
| 451 | |||
| 452 | bio_put(comp_bio); | ||
| 453 | return 0; | ||
| 454 | } | ||
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h new file mode 100644 index 00000000000..421f5b4aa71 --- /dev/null +++ b/fs/btrfs/compression.h | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_COMPRESSION_ | ||
| 20 | #define __BTRFS_COMPRESSION_ | ||
| 21 | |||
| 22 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
| 23 | struct page *dest_page, | ||
| 24 | unsigned long start_byte, | ||
| 25 | size_t srclen, size_t destlen); | ||
| 26 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
| 27 | u64 start, unsigned long len, | ||
| 28 | struct page **pages, | ||
| 29 | unsigned long nr_dest_pages, | ||
| 30 | unsigned long *out_pages, | ||
| 31 | unsigned long *total_in, | ||
| 32 | unsigned long *total_out, | ||
| 33 | unsigned long max_out); | ||
| 34 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
| 35 | u64 disk_start, | ||
| 36 | struct bio_vec *bvec, | ||
| 37 | int vcnt, | ||
| 38 | size_t srclen); | ||
| 39 | void btrfs_zlib_exit(void); | ||
| 40 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | ||
| 41 | unsigned long len, u64 disk_start, | ||
| 42 | unsigned long compressed_len, | ||
| 43 | struct page **compressed_pages, | ||
| 44 | unsigned long nr_pages); | ||
| 45 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | ||
| 46 | int mirror_num, unsigned long bio_flags); | ||
| 47 | #endif | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8559f39fd47..793d8fdda24 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -400,10 +400,18 @@ struct btrfs_timespec { | |||
| 400 | __le32 nsec; | 400 | __le32 nsec; |
| 401 | } __attribute__ ((__packed__)); | 401 | } __attribute__ ((__packed__)); |
| 402 | 402 | ||
| 403 | /* | 403 | typedef enum { |
| 404 | * there is no padding here on purpose. If you want to extent the inode, | 404 | BTRFS_COMPRESS_NONE = 0, |
| 405 | * make a new item type | 405 | BTRFS_COMPRESS_ZLIB = 1, |
| 406 | */ | 406 | BTRFS_COMPRESS_LAST = 2, |
| 407 | } btrfs_compression_type; | ||
| 408 | |||
| 409 | /* we don't understand any encryption methods right now */ | ||
| 410 | typedef enum { | ||
| 411 | BTRFS_ENCRYPTION_NONE = 0, | ||
| 412 | BTRFS_ENCRYPTION_LAST = 1, | ||
| 413 | } btrfs_encryption_type; | ||
| 414 | |||
| 407 | struct btrfs_inode_item { | 415 | struct btrfs_inode_item { |
| 408 | /* nfs style generation number */ | 416 | /* nfs style generation number */ |
| 409 | __le64 generation; | 417 | __le64 generation; |
| @@ -419,6 +427,7 @@ struct btrfs_inode_item { | |||
| 419 | __le64 rdev; | 427 | __le64 rdev; |
| 420 | __le16 flags; | 428 | __le16 flags; |
| 421 | __le16 compat_flags; | 429 | __le16 compat_flags; |
| 430 | |||
| 422 | struct btrfs_timespec atime; | 431 | struct btrfs_timespec atime; |
| 423 | struct btrfs_timespec ctime; | 432 | struct btrfs_timespec ctime; |
| 424 | struct btrfs_timespec mtime; | 433 | struct btrfs_timespec mtime; |
| @@ -454,8 +463,33 @@ struct btrfs_root_item { | |||
| 454 | #define BTRFS_FILE_EXTENT_INLINE 1 | 463 | #define BTRFS_FILE_EXTENT_INLINE 1 |
| 455 | 464 | ||
| 456 | struct btrfs_file_extent_item { | 465 | struct btrfs_file_extent_item { |
| 466 | /* | ||
| 467 | * transaction id that created this extent | ||
| 468 | */ | ||
| 457 | __le64 generation; | 469 | __le64 generation; |
| 470 | /* | ||
| 471 | * max number of bytes to hold this extent in ram | ||
| 472 | * when we split a compressed extent we can't know how big | ||
| 473 | * each of the resulting pieces will be. So, this is | ||
| 474 | * an upper limit on the size of the extent in ram instead of | ||
| 475 | * an exact limit. | ||
| 476 | */ | ||
| 477 | __le64 ram_bytes; | ||
| 478 | |||
| 479 | /* | ||
| 480 | * 32 bits for the various ways we might encode the data, | ||
| 481 | * including compression and encryption. If any of these | ||
| 482 | * are set to something a given disk format doesn't understand | ||
| 483 | * it is treated like an incompat flag for reading and writing, | ||
| 484 | * but not for stat. | ||
| 485 | */ | ||
| 486 | u8 compression; | ||
| 487 | u8 encryption; | ||
| 488 | __le16 other_encoding; /* spare for later use */ | ||
| 489 | |||
| 490 | /* are we inline data or a real extent? */ | ||
| 458 | u8 type; | 491 | u8 type; |
| 492 | |||
| 459 | /* | 493 | /* |
| 460 | * disk space consumed by the extent, checksum blocks are included | 494 | * disk space consumed by the extent, checksum blocks are included |
| 461 | * in these numbers | 495 | * in these numbers |
| @@ -471,9 +505,11 @@ struct btrfs_file_extent_item { | |||
| 471 | */ | 505 | */ |
| 472 | __le64 offset; | 506 | __le64 offset; |
| 473 | /* | 507 | /* |
| 474 | * the logical number of file blocks (no csums included) | 508 | * the logical number of file blocks (no csums included). This |
| 509 | * always reflects the size uncompressed and without encoding. | ||
| 475 | */ | 510 | */ |
| 476 | __le64 num_bytes; | 511 | __le64 num_bytes; |
| 512 | |||
| 477 | } __attribute__ ((__packed__)); | 513 | } __attribute__ ((__packed__)); |
| 478 | 514 | ||
| 479 | struct btrfs_csum_item { | 515 | struct btrfs_csum_item { |
| @@ -814,6 +850,7 @@ struct btrfs_root { | |||
| 814 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) | 850 | #define BTRFS_MOUNT_NOBARRIER (1 << 2) |
| 815 | #define BTRFS_MOUNT_SSD (1 << 3) | 851 | #define BTRFS_MOUNT_SSD (1 << 3) |
| 816 | #define BTRFS_MOUNT_DEGRADED (1 << 4) | 852 | #define BTRFS_MOUNT_DEGRADED (1 << 4) |
| 853 | #define BTRFS_MOUNT_COMPRESS (1 << 5) | ||
| 817 | 854 | ||
| 818 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 855 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
| 819 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 856 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
| @@ -825,6 +862,7 @@ struct btrfs_root { | |||
| 825 | #define BTRFS_INODE_NODATASUM (1 << 0) | 862 | #define BTRFS_INODE_NODATASUM (1 << 0) |
| 826 | #define BTRFS_INODE_NODATACOW (1 << 1) | 863 | #define BTRFS_INODE_NODATACOW (1 << 1) |
| 827 | #define BTRFS_INODE_READONLY (1 << 2) | 864 | #define BTRFS_INODE_READONLY (1 << 2) |
| 865 | #define BTRFS_INODE_NOCOMPRESS (1 << 3) | ||
| 828 | #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ | 866 | #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ |
| 829 | ~BTRFS_INODE_##flag) | 867 | ~BTRFS_INODE_##flag) |
| 830 | #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ | 868 | #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ |
| @@ -1424,14 +1462,6 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) | |||
| 1424 | return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; | 1462 | return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; |
| 1425 | } | 1463 | } |
| 1426 | 1464 | ||
| 1427 | static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, | ||
| 1428 | struct btrfs_item *e) | ||
| 1429 | { | ||
| 1430 | unsigned long offset; | ||
| 1431 | offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
| 1432 | return btrfs_item_size(eb, e) - offset; | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, | 1465 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, |
| 1436 | disk_bytenr, 64); | 1466 | disk_bytenr, 64); |
| 1437 | BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, | 1467 | BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, |
| @@ -1442,6 +1472,36 @@ BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, | |||
| 1442 | offset, 64); | 1472 | offset, 64); |
| 1443 | BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, | 1473 | BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, |
| 1444 | num_bytes, 64); | 1474 | num_bytes, 64); |
| 1475 | BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item, | ||
| 1476 | ram_bytes, 64); | ||
| 1477 | BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item, | ||
| 1478 | compression, 8); | ||
| 1479 | BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item, | ||
| 1480 | encryption, 8); | ||
| 1481 | BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, | ||
| 1482 | other_encoding, 16); | ||
| 1483 | |||
| 1484 | /* this returns the number of file bytes represented by the inline item. | ||
| 1485 | * If an item is compressed, this is the uncompressed size | ||
| 1486 | */ | ||
| 1487 | static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, | ||
| 1488 | struct btrfs_file_extent_item *e) | ||
| 1489 | { | ||
| 1490 | return btrfs_file_extent_ram_bytes(eb, e); | ||
| 1491 | } | ||
| 1492 | |||
| 1493 | /* | ||
| 1494 | * this returns the number of bytes used by the item on disk, minus the | ||
| 1495 | * size of any extent headers. If a file is compressed on disk, this is | ||
| 1496 | * the compressed size | ||
| 1497 | */ | ||
| 1498 | static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, | ||
| 1499 | struct btrfs_item *e) | ||
| 1500 | { | ||
| 1501 | unsigned long offset; | ||
| 1502 | offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
| 1503 | return btrfs_item_size(eb, e) - offset; | ||
| 1504 | } | ||
| 1445 | 1505 | ||
| 1446 | static inline struct btrfs_root *btrfs_sb(struct super_block *sb) | 1506 | static inline struct btrfs_root *btrfs_sb(struct super_block *sb) |
| 1447 | { | 1507 | { |
| @@ -1745,10 +1805,11 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 1745 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 1805 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
| 1746 | struct bio *bio); | 1806 | struct bio *bio); |
| 1747 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 1807 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
| 1748 | struct btrfs_root *root, | 1808 | struct btrfs_root *root, |
| 1749 | u64 objectid, u64 pos, u64 disk_offset, | 1809 | u64 objectid, u64 pos, |
| 1750 | u64 disk_num_bytes, | 1810 | u64 disk_offset, u64 disk_num_bytes, |
| 1751 | u64 num_bytes, u64 offset); | 1811 | u64 num_bytes, u64 offset, u64 ram_bytes, |
| 1812 | u8 compression, u8 encryption, u16 other_encoding); | ||
| 1752 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | 1813 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, |
| 1753 | struct btrfs_root *root, | 1814 | struct btrfs_root *root, |
| 1754 | struct btrfs_path *path, u64 objectid, | 1815 | struct btrfs_path *path, u64 objectid, |
| @@ -1758,6 +1819,8 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
| 1758 | struct btrfs_ordered_sum *sums); | 1819 | struct btrfs_ordered_sum *sums); |
| 1759 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | 1820 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, |
| 1760 | struct bio *bio); | 1821 | struct bio *bio); |
| 1822 | int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode, | ||
| 1823 | u64 start, unsigned long len); | ||
| 1761 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, | 1824 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, |
| 1762 | struct btrfs_root *root, | 1825 | struct btrfs_root *root, |
| 1763 | struct btrfs_path *path, | 1826 | struct btrfs_path *path, |
| @@ -1799,7 +1862,7 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, | |||
| 1799 | int namelen); | 1862 | int namelen); |
| 1800 | 1863 | ||
| 1801 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 1864 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
| 1802 | size_t size, struct bio *bio); | 1865 | size_t size, struct bio *bio, unsigned long bio_flags); |
| 1803 | 1866 | ||
| 1804 | unsigned long btrfs_force_ra(struct address_space *mapping, | 1867 | unsigned long btrfs_force_ra(struct address_space *mapping, |
| 1805 | struct file_ra_state *ra, struct file *file, | 1868 | struct file_ra_state *ra, struct file *file, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0be044bb619..dc95f636a11 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -83,6 +83,7 @@ struct async_submit_bio { | |||
| 83 | extent_submit_bio_hook_t *submit_bio_hook; | 83 | extent_submit_bio_hook_t *submit_bio_hook; |
| 84 | int rw; | 84 | int rw; |
| 85 | int mirror_num; | 85 | int mirror_num; |
| 86 | unsigned long bio_flags; | ||
| 86 | struct btrfs_work work; | 87 | struct btrfs_work work; |
| 87 | }; | 88 | }; |
| 88 | 89 | ||
| @@ -115,6 +116,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, | |||
| 115 | } | 116 | } |
| 116 | em->start = 0; | 117 | em->start = 0; |
| 117 | em->len = (u64)-1; | 118 | em->len = (u64)-1; |
| 119 | em->block_len = (u64)-1; | ||
| 118 | em->block_start = 0; | 120 | em->block_start = 0; |
| 119 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 121 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
| 120 | 122 | ||
| @@ -469,12 +471,13 @@ static void run_one_async_submit(struct btrfs_work *work) | |||
| 469 | wake_up(&fs_info->async_submit_wait); | 471 | wake_up(&fs_info->async_submit_wait); |
| 470 | 472 | ||
| 471 | async->submit_bio_hook(async->inode, async->rw, async->bio, | 473 | async->submit_bio_hook(async->inode, async->rw, async->bio, |
| 472 | async->mirror_num); | 474 | async->mirror_num, async->bio_flags); |
| 473 | kfree(async); | 475 | kfree(async); |
| 474 | } | 476 | } |
| 475 | 477 | ||
| 476 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 478 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
| 477 | int rw, struct bio *bio, int mirror_num, | 479 | int rw, struct bio *bio, int mirror_num, |
| 480 | unsigned long bio_flags, | ||
| 478 | extent_submit_bio_hook_t *submit_bio_hook) | 481 | extent_submit_bio_hook_t *submit_bio_hook) |
| 479 | { | 482 | { |
| 480 | struct async_submit_bio *async; | 483 | struct async_submit_bio *async; |
| @@ -491,6 +494,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
| 491 | async->submit_bio_hook = submit_bio_hook; | 494 | async->submit_bio_hook = submit_bio_hook; |
| 492 | async->work.func = run_one_async_submit; | 495 | async->work.func = run_one_async_submit; |
| 493 | async->work.flags = 0; | 496 | async->work.flags = 0; |
| 497 | async->bio_flags = bio_flags; | ||
| 494 | 498 | ||
| 495 | while(atomic_read(&fs_info->async_submit_draining) && | 499 | while(atomic_read(&fs_info->async_submit_draining) && |
| 496 | atomic_read(&fs_info->nr_async_submits)) { | 500 | atomic_read(&fs_info->nr_async_submits)) { |
| @@ -530,7 +534,7 @@ static int btree_csum_one_bio(struct bio *bio) | |||
| 530 | } | 534 | } |
| 531 | 535 | ||
| 532 | static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 536 | static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
| 533 | int mirror_num) | 537 | int mirror_num, unsigned long bio_flags) |
| 534 | { | 538 | { |
| 535 | struct btrfs_root *root = BTRFS_I(inode)->root; | 539 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 536 | int ret; | 540 | int ret; |
| @@ -556,17 +560,17 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 556 | } | 560 | } |
| 557 | 561 | ||
| 558 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 562 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
| 559 | int mirror_num) | 563 | int mirror_num, unsigned long bio_flags) |
| 560 | { | 564 | { |
| 561 | /* | 565 | /* |
| 562 | * kthread helpers are used to submit writes so that checksumming | 566 | * kthread helpers are used to submit writes so that checksumming |
| 563 | * can happen in parallel across all CPUs | 567 | * can happen in parallel across all CPUs |
| 564 | */ | 568 | */ |
| 565 | if (!(rw & (1 << BIO_RW))) { | 569 | if (!(rw & (1 << BIO_RW))) { |
| 566 | return __btree_submit_bio_hook(inode, rw, bio, mirror_num); | 570 | return __btree_submit_bio_hook(inode, rw, bio, mirror_num, 0); |
| 567 | } | 571 | } |
| 568 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 572 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
| 569 | inode, rw, bio, mirror_num, | 573 | inode, rw, bio, mirror_num, 0, |
| 570 | __btree_submit_bio_hook); | 574 | __btree_submit_bio_hook); |
| 571 | } | 575 | } |
| 572 | 576 | ||
| @@ -1407,6 +1411,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1407 | fs_info->btree_inode = new_inode(sb); | 1411 | fs_info->btree_inode = new_inode(sb); |
| 1408 | fs_info->btree_inode->i_ino = 1; | 1412 | fs_info->btree_inode->i_ino = 1; |
| 1409 | fs_info->btree_inode->i_nlink = 1; | 1413 | fs_info->btree_inode->i_nlink = 1; |
| 1414 | |||
| 1410 | fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); | 1415 | fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); |
| 1411 | 1416 | ||
| 1412 | INIT_LIST_HEAD(&fs_info->ordered_extents); | 1417 | INIT_LIST_HEAD(&fs_info->ordered_extents); |
| @@ -1508,6 +1513,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1508 | */ | 1513 | */ |
| 1509 | btrfs_init_workers(&fs_info->workers, "worker", | 1514 | btrfs_init_workers(&fs_info->workers, "worker", |
| 1510 | fs_info->thread_pool_size); | 1515 | fs_info->thread_pool_size); |
| 1516 | |||
| 1511 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 1517 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
| 1512 | min_t(u64, fs_devices->num_devices, | 1518 | min_t(u64, fs_devices->num_devices, |
| 1513 | fs_info->thread_pool_size)); | 1519 | fs_info->thread_pool_size)); |
| @@ -1559,6 +1565,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1559 | } | 1565 | } |
| 1560 | 1566 | ||
| 1561 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1567 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
| 1568 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | ||
| 1569 | 4 * 1024 * 1024 / PAGE_CACHE_SIZE); | ||
| 1562 | 1570 | ||
| 1563 | nodesize = btrfs_super_nodesize(disk_super); | 1571 | nodesize = btrfs_super_nodesize(disk_super); |
| 1564 | leafsize = btrfs_super_leafsize(disk_super); | 1572 | leafsize = btrfs_super_leafsize(disk_super); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index f84f5058dbb..4eb1f1408d2 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -71,6 +71,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
| 71 | int metadata); | 71 | int metadata); |
| 72 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 72 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
| 73 | int rw, struct bio *bio, int mirror_num, | 73 | int rw, struct bio *bio, int mirror_num, |
| 74 | unsigned long bio_flags, | ||
| 74 | extent_submit_bio_hook_t *submit_bio_hook); | 75 | extent_submit_bio_hook_t *submit_bio_hook); |
| 75 | int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); | 76 | int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); |
| 76 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | 77 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 280ac1aa9b6..bbf04e80a1a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -3278,6 +3278,7 @@ static int noinline relocate_data_extent(struct inode *reloc_inode, | |||
| 3278 | 3278 | ||
| 3279 | em->start = extent_key->objectid - offset; | 3279 | em->start = extent_key->objectid - offset; |
| 3280 | em->len = extent_key->offset; | 3280 | em->len = extent_key->offset; |
| 3281 | em->block_len = extent_key->offset; | ||
| 3281 | em->block_start = extent_key->objectid; | 3282 | em->block_start = extent_key->objectid; |
| 3282 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 3283 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 3283 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 3284 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| @@ -3314,10 +3315,14 @@ struct btrfs_ref_path { | |||
| 3314 | }; | 3315 | }; |
| 3315 | 3316 | ||
| 3316 | struct disk_extent { | 3317 | struct disk_extent { |
| 3318 | u64 ram_bytes; | ||
| 3317 | u64 disk_bytenr; | 3319 | u64 disk_bytenr; |
| 3318 | u64 disk_num_bytes; | 3320 | u64 disk_num_bytes; |
| 3319 | u64 offset; | 3321 | u64 offset; |
| 3320 | u64 num_bytes; | 3322 | u64 num_bytes; |
| 3323 | u8 compression; | ||
| 3324 | u8 encryption; | ||
| 3325 | u16 other_encoding; | ||
| 3321 | }; | 3326 | }; |
| 3322 | 3327 | ||
| 3323 | static int is_cowonly_root(u64 root_objectid) | 3328 | static int is_cowonly_root(u64 root_objectid) |
| @@ -3631,6 +3636,11 @@ static int noinline get_new_locations(struct inode *reloc_inode, | |||
| 3631 | btrfs_file_extent_disk_num_bytes(leaf, fi); | 3636 | btrfs_file_extent_disk_num_bytes(leaf, fi); |
| 3632 | exts[nr].offset = btrfs_file_extent_offset(leaf, fi); | 3637 | exts[nr].offset = btrfs_file_extent_offset(leaf, fi); |
| 3633 | exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | 3638 | exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); |
| 3639 | exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); | ||
| 3640 | exts[nr].compression = btrfs_file_extent_compression(leaf, fi); | ||
| 3641 | exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); | ||
| 3642 | exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, | ||
| 3643 | fi); | ||
| 3634 | WARN_ON(exts[nr].offset > 0); | 3644 | WARN_ON(exts[nr].offset > 0); |
| 3635 | WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); | 3645 | WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); |
| 3636 | 3646 | ||
| @@ -3846,6 +3856,8 @@ next: | |||
| 3846 | new_extents[0].disk_bytenr); | 3856 | new_extents[0].disk_bytenr); |
| 3847 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | 3857 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, |
| 3848 | new_extents[0].disk_num_bytes); | 3858 | new_extents[0].disk_num_bytes); |
| 3859 | btrfs_set_file_extent_ram_bytes(leaf, fi, | ||
| 3860 | new_extents[0].ram_bytes); | ||
| 3849 | ext_offset += new_extents[0].offset; | 3861 | ext_offset += new_extents[0].offset; |
| 3850 | btrfs_set_file_extent_offset(leaf, fi, ext_offset); | 3862 | btrfs_set_file_extent_offset(leaf, fi, ext_offset); |
| 3851 | btrfs_mark_buffer_dirty(leaf); | 3863 | btrfs_mark_buffer_dirty(leaf); |
| @@ -3911,6 +3923,16 @@ next: | |||
| 3911 | new_extents[i].disk_bytenr); | 3923 | new_extents[i].disk_bytenr); |
| 3912 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | 3924 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, |
| 3913 | new_extents[i].disk_num_bytes); | 3925 | new_extents[i].disk_num_bytes); |
| 3926 | btrfs_set_file_extent_ram_bytes(leaf, fi, | ||
| 3927 | new_extents[i].ram_bytes); | ||
| 3928 | |||
| 3929 | btrfs_set_file_extent_compression(leaf, fi, | ||
| 3930 | new_extents[i].compression); | ||
| 3931 | btrfs_set_file_extent_encryption(leaf, fi, | ||
| 3932 | new_extents[i].encryption); | ||
| 3933 | btrfs_set_file_extent_other_encoding(leaf, fi, | ||
| 3934 | new_extents[i].other_encoding); | ||
| 3935 | |||
| 3914 | btrfs_set_file_extent_num_bytes(leaf, fi, | 3936 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 3915 | extent_len); | 3937 | extent_len); |
| 3916 | ext_offset += new_extents[i].offset; | 3938 | ext_offset += new_extents[i].offset; |
| @@ -4169,6 +4191,8 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, | |||
| 4169 | ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; | 4191 | ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; |
| 4170 | 4192 | ||
| 4171 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | 4193 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); |
| 4194 | btrfs_set_file_extent_ram_bytes(leaf, fi, | ||
| 4195 | new_extent->ram_bytes); | ||
| 4172 | btrfs_set_file_extent_disk_bytenr(leaf, fi, | 4196 | btrfs_set_file_extent_disk_bytenr(leaf, fi, |
| 4173 | new_extent->disk_bytenr); | 4197 | new_extent->disk_bytenr); |
| 4174 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, | 4198 | btrfs_set_file_extent_disk_num_bytes(leaf, fi, |
| @@ -4847,7 +4871,8 @@ static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 4847 | BUG_ON(err); | 4871 | BUG_ON(err); |
| 4848 | 4872 | ||
| 4849 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | 4873 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, |
| 4850 | group->key.offset, 0); | 4874 | group->key.offset, 0, group->key.offset, |
| 4875 | 0, 0, 0); | ||
| 4851 | BUG_ON(err); | 4876 | BUG_ON(err); |
| 4852 | 4877 | ||
| 4853 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | 4878 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 563b2d12f4f..314041fdfa4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -30,6 +30,7 @@ static struct kmem_cache *extent_buffer_cache; | |||
| 30 | static LIST_HEAD(buffers); | 30 | static LIST_HEAD(buffers); |
| 31 | static LIST_HEAD(states); | 31 | static LIST_HEAD(states); |
| 32 | 32 | ||
| 33 | #define LEAK_DEBUG 1 | ||
| 33 | #ifdef LEAK_DEBUG | 34 | #ifdef LEAK_DEBUG |
| 34 | static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; | 35 | static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; |
| 35 | #endif | 36 | #endif |
| @@ -1067,8 +1068,8 @@ EXPORT_SYMBOL(find_first_extent_bit_state); | |||
| 1067 | * | 1068 | * |
| 1068 | * 1 is returned if we find something, 0 if nothing was in the tree | 1069 | * 1 is returned if we find something, 0 if nothing was in the tree |
| 1069 | */ | 1070 | */ |
| 1070 | static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, | 1071 | static noinline u64 find_delalloc_range(struct extent_io_tree *tree, |
| 1071 | u64 *start, u64 *end, u64 max_bytes) | 1072 | u64 *start, u64 *end, u64 max_bytes) |
| 1072 | { | 1073 | { |
| 1073 | struct rb_node *node; | 1074 | struct rb_node *node; |
| 1074 | struct extent_state *state; | 1075 | struct extent_state *state; |
| @@ -1077,11 +1078,11 @@ static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, | |||
| 1077 | u64 total_bytes = 0; | 1078 | u64 total_bytes = 0; |
| 1078 | 1079 | ||
| 1079 | spin_lock_irq(&tree->lock); | 1080 | spin_lock_irq(&tree->lock); |
| 1081 | |||
| 1080 | /* | 1082 | /* |
| 1081 | * this search will find all the extents that end after | 1083 | * this search will find all the extents that end after |
| 1082 | * our range starts. | 1084 | * our range starts. |
| 1083 | */ | 1085 | */ |
| 1084 | search_again: | ||
| 1085 | node = tree_search(tree, cur_start); | 1086 | node = tree_search(tree, cur_start); |
| 1086 | if (!node) { | 1087 | if (!node) { |
| 1087 | if (!found) | 1088 | if (!found) |
| @@ -1100,40 +1101,6 @@ search_again: | |||
| 1100 | *end = state->end; | 1101 | *end = state->end; |
| 1101 | goto out; | 1102 | goto out; |
| 1102 | } | 1103 | } |
| 1103 | if (!found && !(state->state & EXTENT_BOUNDARY)) { | ||
| 1104 | struct extent_state *prev_state; | ||
| 1105 | struct rb_node *prev_node = node; | ||
| 1106 | while(1) { | ||
| 1107 | prev_node = rb_prev(prev_node); | ||
| 1108 | if (!prev_node) | ||
| 1109 | break; | ||
| 1110 | prev_state = rb_entry(prev_node, | ||
| 1111 | struct extent_state, | ||
| 1112 | rb_node); | ||
| 1113 | if ((prev_state->end + 1 != state->start) || | ||
| 1114 | !(prev_state->state & EXTENT_DELALLOC)) | ||
| 1115 | break; | ||
| 1116 | if ((cur_start - prev_state->start) * 2 > | ||
| 1117 | max_bytes) | ||
| 1118 | break; | ||
| 1119 | state = prev_state; | ||
| 1120 | node = prev_node; | ||
| 1121 | } | ||
| 1122 | } | ||
| 1123 | if (state->state & EXTENT_LOCKED) { | ||
| 1124 | DEFINE_WAIT(wait); | ||
| 1125 | atomic_inc(&state->refs); | ||
| 1126 | prepare_to_wait(&state->wq, &wait, | ||
| 1127 | TASK_UNINTERRUPTIBLE); | ||
| 1128 | spin_unlock_irq(&tree->lock); | ||
| 1129 | schedule(); | ||
| 1130 | spin_lock_irq(&tree->lock); | ||
| 1131 | finish_wait(&state->wq, &wait); | ||
| 1132 | free_extent_state(state); | ||
| 1133 | goto search_again; | ||
| 1134 | } | ||
| 1135 | set_state_cb(tree, state, EXTENT_LOCKED); | ||
| 1136 | state->state |= EXTENT_LOCKED; | ||
| 1137 | if (!found) | 1104 | if (!found) |
| 1138 | *start = state->start; | 1105 | *start = state->start; |
| 1139 | found++; | 1106 | found++; |
| @@ -1151,6 +1118,208 @@ out: | |||
| 1151 | return found; | 1118 | return found; |
| 1152 | } | 1119 | } |
| 1153 | 1120 | ||
| 1121 | static noinline int __unlock_for_delalloc(struct inode *inode, | ||
| 1122 | struct page *locked_page, | ||
| 1123 | u64 start, u64 end) | ||
| 1124 | { | ||
| 1125 | int ret; | ||
| 1126 | struct page *pages[16]; | ||
| 1127 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 1128 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
| 1129 | unsigned long nr_pages = end_index - index + 1; | ||
| 1130 | int i; | ||
| 1131 | |||
| 1132 | if (index == locked_page->index && end_index == index) | ||
| 1133 | return 0; | ||
| 1134 | |||
| 1135 | while(nr_pages > 0) { | ||
| 1136 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
| 1137 | min(nr_pages, ARRAY_SIZE(pages)), pages); | ||
| 1138 | for (i = 0; i < ret; i++) { | ||
| 1139 | if (pages[i] != locked_page) | ||
| 1140 | unlock_page(pages[i]); | ||
| 1141 | page_cache_release(pages[i]); | ||
| 1142 | } | ||
| 1143 | nr_pages -= ret; | ||
| 1144 | index += ret; | ||
| 1145 | cond_resched(); | ||
| 1146 | } | ||
| 1147 | return 0; | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | static noinline int lock_delalloc_pages(struct inode *inode, | ||
| 1151 | struct page *locked_page, | ||
| 1152 | u64 delalloc_start, | ||
| 1153 | u64 delalloc_end) | ||
| 1154 | { | ||
| 1155 | unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT; | ||
| 1156 | unsigned long start_index = index; | ||
| 1157 | unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT; | ||
| 1158 | unsigned long pages_locked = 0; | ||
| 1159 | struct page *pages[16]; | ||
| 1160 | unsigned long nrpages; | ||
| 1161 | int ret; | ||
| 1162 | int i; | ||
| 1163 | |||
| 1164 | /* the caller is responsible for locking the start index */ | ||
| 1165 | if (index == locked_page->index && index == end_index) | ||
| 1166 | return 0; | ||
| 1167 | |||
| 1168 | /* skip the page at the start index */ | ||
| 1169 | nrpages = end_index - index + 1; | ||
| 1170 | while(nrpages > 0) { | ||
| 1171 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
| 1172 | min(nrpages, ARRAY_SIZE(pages)), pages); | ||
| 1173 | if (ret == 0) { | ||
| 1174 | ret = -EAGAIN; | ||
| 1175 | goto done; | ||
| 1176 | } | ||
| 1177 | /* now we have an array of pages, lock them all */ | ||
| 1178 | for (i = 0; i < ret; i++) { | ||
| 1179 | /* | ||
| 1180 | * the caller is taking responsibility for | ||
| 1181 | * locked_page | ||
| 1182 | */ | ||
| 1183 | if (pages[i] != locked_page) | ||
| 1184 | lock_page(pages[i]); | ||
| 1185 | page_cache_release(pages[i]); | ||
| 1186 | } | ||
| 1187 | pages_locked += ret; | ||
| 1188 | nrpages -= ret; | ||
| 1189 | index += ret; | ||
| 1190 | cond_resched(); | ||
| 1191 | } | ||
| 1192 | ret = 0; | ||
| 1193 | done: | ||
| 1194 | if (ret && pages_locked) { | ||
| 1195 | __unlock_for_delalloc(inode, locked_page, | ||
| 1196 | delalloc_start, | ||
| 1197 | ((u64)(start_index + pages_locked - 1)) << | ||
| 1198 | PAGE_CACHE_SHIFT); | ||
| 1199 | } | ||
| 1200 | return ret; | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | /* | ||
| 1204 | * find a contiguous range of bytes in the file marked as delalloc, not | ||
| 1205 | * more than 'max_bytes'. start and end are used to return the range, | ||
| 1206 | * | ||
| 1207 | * 1 is returned if we find something, 0 if nothing was in the tree | ||
| 1208 | */ | ||
| 1209 | static noinline u64 find_lock_delalloc_range(struct inode *inode, | ||
| 1210 | struct extent_io_tree *tree, | ||
| 1211 | struct page *locked_page, | ||
| 1212 | u64 *start, u64 *end, | ||
| 1213 | u64 max_bytes) | ||
| 1214 | { | ||
| 1215 | u64 delalloc_start; | ||
| 1216 | u64 delalloc_end; | ||
| 1217 | u64 found; | ||
| 1218 | int ret; | ||
| 1219 | int loops = 0; | ||
| 1220 | |||
| 1221 | again: | ||
| 1222 | /* step one, find a bunch of delalloc bytes starting at start */ | ||
| 1223 | delalloc_start = *start; | ||
| 1224 | delalloc_end = 0; | ||
| 1225 | found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, | ||
| 1226 | max_bytes); | ||
| 1227 | if (!found) { | ||
| 1228 | *start = delalloc_start; | ||
| 1229 | *end = delalloc_end; | ||
| 1230 | return found; | ||
| 1231 | } | ||
| 1232 | |||
| 1233 | /* | ||
| 1234 | * make sure to limit the number of pages we try to lock down | ||
| 1235 | * if we're looping. | ||
| 1236 | */ | ||
| 1237 | if (delalloc_end + 1 - delalloc_start > max_bytes && loops) { | ||
| 1238 | delalloc_end = (delalloc_start + PAGE_CACHE_SIZE - 1) & | ||
| 1239 | ~((u64)PAGE_CACHE_SIZE - 1); | ||
| 1240 | } | ||
| 1241 | /* step two, lock all the pages after the page that has start */ | ||
| 1242 | ret = lock_delalloc_pages(inode, locked_page, | ||
| 1243 | delalloc_start, delalloc_end); | ||
| 1244 | if (ret == -EAGAIN) { | ||
| 1245 | /* some of the pages are gone, lets avoid looping by | ||
| 1246 | * shortening the size of the delalloc range we're searching | ||
| 1247 | */ | ||
| 1248 | if (!loops) { | ||
| 1249 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); | ||
| 1250 | max_bytes = PAGE_CACHE_SIZE - offset; | ||
| 1251 | loops = 1; | ||
| 1252 | goto again; | ||
| 1253 | } else { | ||
| 1254 | found = 0; | ||
| 1255 | goto out_failed; | ||
| 1256 | } | ||
| 1257 | } | ||
| 1258 | BUG_ON(ret); | ||
| 1259 | |||
| 1260 | /* step three, lock the state bits for the whole range */ | ||
| 1261 | lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | ||
| 1262 | |||
| 1263 | /* then test to make sure it is all still delalloc */ | ||
| 1264 | ret = test_range_bit(tree, delalloc_start, delalloc_end, | ||
| 1265 | EXTENT_DELALLOC, 1); | ||
| 1266 | if (!ret) { | ||
| 1267 | unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | ||
| 1268 | __unlock_for_delalloc(inode, locked_page, | ||
| 1269 | delalloc_start, delalloc_end); | ||
| 1270 | cond_resched(); | ||
| 1271 | goto again; | ||
| 1272 | } | ||
| 1273 | *start = delalloc_start; | ||
| 1274 | *end = delalloc_end; | ||
| 1275 | out_failed: | ||
| 1276 | return found; | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | int extent_clear_unlock_delalloc(struct inode *inode, | ||
| 1280 | struct extent_io_tree *tree, | ||
| 1281 | u64 start, u64 end, struct page *locked_page, | ||
| 1282 | int clear_dirty, int set_writeback, | ||
| 1283 | int end_writeback) | ||
| 1284 | { | ||
| 1285 | int ret; | ||
| 1286 | struct page *pages[16]; | ||
| 1287 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 1288 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
| 1289 | unsigned long nr_pages = end_index - index + 1; | ||
| 1290 | int i; | ||
| 1291 | int clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC; | ||
| 1292 | |||
| 1293 | if (clear_dirty) | ||
| 1294 | clear_bits |= EXTENT_DIRTY; | ||
| 1295 | |||
| 1296 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); | ||
| 1297 | |||
| 1298 | while(nr_pages > 0) { | ||
| 1299 | ret = find_get_pages_contig(inode->i_mapping, index, | ||
| 1300 | min(nr_pages, ARRAY_SIZE(pages)), pages); | ||
| 1301 | for (i = 0; i < ret; i++) { | ||
| 1302 | if (pages[i] == locked_page) { | ||
| 1303 | page_cache_release(pages[i]); | ||
| 1304 | continue; | ||
| 1305 | } | ||
| 1306 | if (clear_dirty) | ||
| 1307 | clear_page_dirty_for_io(pages[i]); | ||
| 1308 | if (set_writeback) | ||
| 1309 | set_page_writeback(pages[i]); | ||
| 1310 | if (end_writeback) | ||
| 1311 | end_page_writeback(pages[i]); | ||
| 1312 | unlock_page(pages[i]); | ||
| 1313 | page_cache_release(pages[i]); | ||
| 1314 | } | ||
| 1315 | nr_pages -= ret; | ||
| 1316 | index += ret; | ||
| 1317 | cond_resched(); | ||
| 1318 | } | ||
| 1319 | return 0; | ||
| 1320 | } | ||
| 1321 | EXPORT_SYMBOL(extent_clear_unlock_delalloc); | ||
| 1322 | |||
| 1154 | /* | 1323 | /* |
| 1155 | * count the number of bytes in the tree that have a given bit(s) | 1324 | * count the number of bytes in the tree that have a given bit(s) |
| 1156 | * set. This can be fairly slow, except for EXTENT_DIRTY which is | 1325 | * set. This can be fairly slow, except for EXTENT_DIRTY which is |
| @@ -1631,38 +1800,26 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | |||
| 1631 | return bio; | 1800 | return bio; |
| 1632 | } | 1801 | } |
| 1633 | 1802 | ||
| 1634 | static int submit_one_bio(int rw, struct bio *bio, int mirror_num) | 1803 | static int submit_one_bio(int rw, struct bio *bio, int mirror_num, |
| 1804 | unsigned long bio_flags) | ||
| 1635 | { | 1805 | { |
| 1636 | int ret = 0; | 1806 | int ret = 0; |
| 1637 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | 1807 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; |
| 1638 | struct page *page = bvec->bv_page; | 1808 | struct page *page = bvec->bv_page; |
| 1639 | struct extent_io_tree *tree = bio->bi_private; | 1809 | struct extent_io_tree *tree = bio->bi_private; |
| 1640 | struct rb_node *node; | ||
| 1641 | struct extent_state *state; | ||
| 1642 | u64 start; | 1810 | u64 start; |
| 1643 | u64 end; | 1811 | u64 end; |
| 1644 | 1812 | ||
| 1645 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; | 1813 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; |
| 1646 | end = start + bvec->bv_len - 1; | 1814 | end = start + bvec->bv_len - 1; |
| 1647 | 1815 | ||
| 1648 | spin_lock_irq(&tree->lock); | ||
| 1649 | node = __etree_search(tree, start, NULL, NULL); | ||
| 1650 | BUG_ON(!node); | ||
| 1651 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1652 | while(state->end < end) { | ||
| 1653 | node = rb_next(node); | ||
| 1654 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 1655 | } | ||
| 1656 | BUG_ON(state->end != end); | ||
| 1657 | spin_unlock_irq(&tree->lock); | ||
| 1658 | |||
| 1659 | bio->bi_private = NULL; | 1816 | bio->bi_private = NULL; |
| 1660 | 1817 | ||
| 1661 | bio_get(bio); | 1818 | bio_get(bio); |
| 1662 | 1819 | ||
| 1663 | if (tree->ops && tree->ops->submit_bio_hook) | 1820 | if (tree->ops && tree->ops->submit_bio_hook) |
| 1664 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1821 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
| 1665 | mirror_num); | 1822 | mirror_num, bio_flags); |
| 1666 | else | 1823 | else |
| 1667 | submit_bio(rw, bio); | 1824 | submit_bio(rw, bio); |
| 1668 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 1825 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
| @@ -1678,39 +1835,56 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
| 1678 | struct bio **bio_ret, | 1835 | struct bio **bio_ret, |
| 1679 | unsigned long max_pages, | 1836 | unsigned long max_pages, |
| 1680 | bio_end_io_t end_io_func, | 1837 | bio_end_io_t end_io_func, |
| 1681 | int mirror_num) | 1838 | int mirror_num, |
| 1839 | unsigned long prev_bio_flags, | ||
| 1840 | unsigned long bio_flags) | ||
| 1682 | { | 1841 | { |
| 1683 | int ret = 0; | 1842 | int ret = 0; |
| 1684 | struct bio *bio; | 1843 | struct bio *bio; |
| 1685 | int nr; | 1844 | int nr; |
| 1845 | int contig = 0; | ||
| 1846 | int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED; | ||
| 1847 | int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; | ||
| 1848 | size_t page_size = min(size, PAGE_CACHE_SIZE); | ||
| 1686 | 1849 | ||
| 1687 | if (bio_ret && *bio_ret) { | 1850 | if (bio_ret && *bio_ret) { |
| 1688 | bio = *bio_ret; | 1851 | bio = *bio_ret; |
| 1689 | if (bio->bi_sector + (bio->bi_size >> 9) != sector || | 1852 | if (old_compressed) |
| 1853 | contig = bio->bi_sector == sector; | ||
| 1854 | else | ||
| 1855 | contig = bio->bi_sector + (bio->bi_size >> 9) == | ||
| 1856 | sector; | ||
| 1857 | |||
| 1858 | if (prev_bio_flags != bio_flags || !contig || | ||
| 1690 | (tree->ops && tree->ops->merge_bio_hook && | 1859 | (tree->ops && tree->ops->merge_bio_hook && |
| 1691 | tree->ops->merge_bio_hook(page, offset, size, bio)) || | 1860 | tree->ops->merge_bio_hook(page, offset, page_size, bio, |
| 1692 | bio_add_page(bio, page, size, offset) < size) { | 1861 | bio_flags)) || |
| 1693 | ret = submit_one_bio(rw, bio, mirror_num); | 1862 | bio_add_page(bio, page, page_size, offset) < page_size) { |
| 1863 | ret = submit_one_bio(rw, bio, mirror_num, | ||
| 1864 | prev_bio_flags); | ||
| 1694 | bio = NULL; | 1865 | bio = NULL; |
| 1695 | } else { | 1866 | } else { |
| 1696 | return 0; | 1867 | return 0; |
| 1697 | } | 1868 | } |
| 1698 | } | 1869 | } |
| 1699 | nr = bio_get_nr_vecs(bdev); | 1870 | if (this_compressed) |
| 1871 | nr = BIO_MAX_PAGES; | ||
| 1872 | else | ||
| 1873 | nr = bio_get_nr_vecs(bdev); | ||
| 1874 | |||
| 1700 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1875 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
| 1701 | if (!bio) { | 1876 | if (!bio) { |
| 1702 | printk("failed to allocate bio nr %d\n", nr); | 1877 | printk("failed to allocate bio nr %d\n", nr); |
| 1703 | } | 1878 | } |
| 1704 | 1879 | ||
| 1705 | 1880 | bio_add_page(bio, page, page_size, offset); | |
| 1706 | bio_add_page(bio, page, size, offset); | ||
| 1707 | bio->bi_end_io = end_io_func; | 1881 | bio->bi_end_io = end_io_func; |
| 1708 | bio->bi_private = tree; | 1882 | bio->bi_private = tree; |
| 1709 | 1883 | ||
| 1710 | if (bio_ret) { | 1884 | if (bio_ret) { |
| 1711 | *bio_ret = bio; | 1885 | *bio_ret = bio; |
| 1712 | } else { | 1886 | } else { |
| 1713 | ret = submit_one_bio(rw, bio, mirror_num); | 1887 | ret = submit_one_bio(rw, bio, mirror_num, bio_flags); |
| 1714 | } | 1888 | } |
| 1715 | 1889 | ||
| 1716 | return ret; | 1890 | return ret; |
| @@ -1738,7 +1912,8 @@ void set_page_extent_head(struct page *page, unsigned long len) | |||
| 1738 | static int __extent_read_full_page(struct extent_io_tree *tree, | 1912 | static int __extent_read_full_page(struct extent_io_tree *tree, |
| 1739 | struct page *page, | 1913 | struct page *page, |
| 1740 | get_extent_t *get_extent, | 1914 | get_extent_t *get_extent, |
| 1741 | struct bio **bio, int mirror_num) | 1915 | struct bio **bio, int mirror_num, |
| 1916 | unsigned long *bio_flags) | ||
| 1742 | { | 1917 | { |
| 1743 | struct inode *inode = page->mapping->host; | 1918 | struct inode *inode = page->mapping->host; |
| 1744 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1919 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
| @@ -1756,13 +1931,27 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 1756 | int nr = 0; | 1931 | int nr = 0; |
| 1757 | size_t page_offset = 0; | 1932 | size_t page_offset = 0; |
| 1758 | size_t iosize; | 1933 | size_t iosize; |
| 1934 | size_t disk_io_size; | ||
| 1759 | size_t blocksize = inode->i_sb->s_blocksize; | 1935 | size_t blocksize = inode->i_sb->s_blocksize; |
| 1936 | unsigned long this_bio_flag = 0; | ||
| 1760 | 1937 | ||
| 1761 | set_page_extent_mapped(page); | 1938 | set_page_extent_mapped(page); |
| 1762 | 1939 | ||
| 1763 | end = page_end; | 1940 | end = page_end; |
| 1764 | lock_extent(tree, start, end, GFP_NOFS); | 1941 | lock_extent(tree, start, end, GFP_NOFS); |
| 1765 | 1942 | ||
| 1943 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { | ||
| 1944 | char *userpage; | ||
| 1945 | size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); | ||
| 1946 | |||
| 1947 | if (zero_offset) { | ||
| 1948 | iosize = PAGE_CACHE_SIZE - zero_offset; | ||
| 1949 | userpage = kmap_atomic(page, KM_USER0); | ||
| 1950 | memset(userpage + zero_offset, 0, iosize); | ||
| 1951 | flush_dcache_page(page); | ||
| 1952 | kunmap_atomic(userpage, KM_USER0); | ||
| 1953 | } | ||
| 1954 | } | ||
| 1766 | while (cur <= end) { | 1955 | while (cur <= end) { |
| 1767 | if (cur >= last_byte) { | 1956 | if (cur >= last_byte) { |
| 1768 | char *userpage; | 1957 | char *userpage; |
| @@ -1793,10 +1982,19 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); | |||
| 1793 | } | 1982 | } |
| 1794 | BUG_ON(end < cur); | 1983 | BUG_ON(end < cur); |
| 1795 | 1984 | ||
| 1985 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | ||
| 1986 | this_bio_flag = EXTENT_BIO_COMPRESSED; | ||
| 1987 | |||
| 1796 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 1988 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
| 1797 | cur_end = min(extent_map_end(em) - 1, end); | 1989 | cur_end = min(extent_map_end(em) - 1, end); |
| 1798 | iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); | 1990 | iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); |
| 1799 | sector = (em->block_start + extent_offset) >> 9; | 1991 | if (this_bio_flag & EXTENT_BIO_COMPRESSED) { |
| 1992 | disk_io_size = em->block_len; | ||
| 1993 | sector = em->block_start >> 9; | ||
| 1994 | } else { | ||
| 1995 | sector = (em->block_start + extent_offset) >> 9; | ||
| 1996 | disk_io_size = iosize; | ||
| 1997 | } | ||
| 1800 | bdev = em->bdev; | 1998 | bdev = em->bdev; |
| 1801 | block_start = em->block_start; | 1999 | block_start = em->block_start; |
| 1802 | free_extent_map(em); | 2000 | free_extent_map(em); |
| @@ -1845,10 +2043,13 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); | |||
| 1845 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; | 2043 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; |
| 1846 | pnr -= page->index; | 2044 | pnr -= page->index; |
| 1847 | ret = submit_extent_page(READ, tree, page, | 2045 | ret = submit_extent_page(READ, tree, page, |
| 1848 | sector, iosize, page_offset, | 2046 | sector, disk_io_size, page_offset, |
| 1849 | bdev, bio, pnr, | 2047 | bdev, bio, pnr, |
| 1850 | end_bio_extent_readpage, mirror_num); | 2048 | end_bio_extent_readpage, mirror_num, |
| 2049 | *bio_flags, | ||
| 2050 | this_bio_flag); | ||
| 1851 | nr++; | 2051 | nr++; |
| 2052 | *bio_flags = this_bio_flag; | ||
| 1852 | } | 2053 | } |
| 1853 | if (ret) | 2054 | if (ret) |
| 1854 | SetPageError(page); | 2055 | SetPageError(page); |
| @@ -1867,11 +2068,13 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
| 1867 | get_extent_t *get_extent) | 2068 | get_extent_t *get_extent) |
| 1868 | { | 2069 | { |
| 1869 | struct bio *bio = NULL; | 2070 | struct bio *bio = NULL; |
| 2071 | unsigned long bio_flags = 0; | ||
| 1870 | int ret; | 2072 | int ret; |
| 1871 | 2073 | ||
| 1872 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0); | 2074 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, |
| 2075 | &bio_flags); | ||
| 1873 | if (bio) | 2076 | if (bio) |
| 1874 | submit_one_bio(READ, bio, 0); | 2077 | submit_one_bio(READ, bio, 0, bio_flags); |
| 1875 | return ret; | 2078 | return ret; |
| 1876 | } | 2079 | } |
| 1877 | EXPORT_SYMBOL(extent_read_full_page); | 2080 | EXPORT_SYMBOL(extent_read_full_page); |
| @@ -1909,6 +2112,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 1909 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | 2112 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; |
| 1910 | u64 nr_delalloc; | 2113 | u64 nr_delalloc; |
| 1911 | u64 delalloc_end; | 2114 | u64 delalloc_end; |
| 2115 | int page_started; | ||
| 2116 | int compressed; | ||
| 1912 | 2117 | ||
| 1913 | WARN_ON(!PageLocked(page)); | 2118 | WARN_ON(!PageLocked(page)); |
| 1914 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2119 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
| @@ -1934,27 +2139,33 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 1934 | 2139 | ||
| 1935 | delalloc_start = start; | 2140 | delalloc_start = start; |
| 1936 | delalloc_end = 0; | 2141 | delalloc_end = 0; |
| 2142 | page_started = 0; | ||
| 1937 | while(delalloc_end < page_end) { | 2143 | while(delalloc_end < page_end) { |
| 1938 | nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, | 2144 | nr_delalloc = find_lock_delalloc_range(inode, tree, |
| 2145 | page, | ||
| 2146 | &delalloc_start, | ||
| 1939 | &delalloc_end, | 2147 | &delalloc_end, |
| 1940 | 128 * 1024 * 1024); | 2148 | 128 * 1024 * 1024); |
| 1941 | if (nr_delalloc == 0) { | 2149 | if (nr_delalloc == 0) { |
| 1942 | delalloc_start = delalloc_end + 1; | 2150 | delalloc_start = delalloc_end + 1; |
| 1943 | continue; | 2151 | continue; |
| 1944 | } | 2152 | } |
| 1945 | tree->ops->fill_delalloc(inode, delalloc_start, | 2153 | tree->ops->fill_delalloc(inode, page, delalloc_start, |
| 1946 | delalloc_end); | 2154 | delalloc_end, &page_started); |
| 1947 | clear_extent_bit(tree, delalloc_start, | ||
| 1948 | delalloc_end, | ||
| 1949 | EXTENT_LOCKED | EXTENT_DELALLOC, | ||
| 1950 | 1, 0, GFP_NOFS); | ||
| 1951 | delalloc_start = delalloc_end + 1; | 2155 | delalloc_start = delalloc_end + 1; |
| 1952 | } | 2156 | } |
| 2157 | |||
| 2158 | /* did the fill delalloc function already unlock and start the IO? */ | ||
| 2159 | if (page_started) { | ||
| 2160 | return 0; | ||
| 2161 | } | ||
| 2162 | |||
| 1953 | lock_extent(tree, start, page_end, GFP_NOFS); | 2163 | lock_extent(tree, start, page_end, GFP_NOFS); |
| 1954 | unlock_start = start; | 2164 | unlock_start = start; |
| 1955 | 2165 | ||
| 1956 | if (tree->ops && tree->ops->writepage_start_hook) { | 2166 | if (tree->ops && tree->ops->writepage_start_hook) { |
| 1957 | ret = tree->ops->writepage_start_hook(page, start, page_end); | 2167 | ret = tree->ops->writepage_start_hook(page, start, |
| 2168 | page_end); | ||
| 1958 | if (ret == -EAGAIN) { | 2169 | if (ret == -EAGAIN) { |
| 1959 | unlock_extent(tree, start, page_end, GFP_NOFS); | 2170 | unlock_extent(tree, start, page_end, GFP_NOFS); |
| 1960 | redirty_page_for_writepage(wbc, page); | 2171 | redirty_page_for_writepage(wbc, page); |
| @@ -2006,10 +2217,15 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2006 | sector = (em->block_start + extent_offset) >> 9; | 2217 | sector = (em->block_start + extent_offset) >> 9; |
| 2007 | bdev = em->bdev; | 2218 | bdev = em->bdev; |
| 2008 | block_start = em->block_start; | 2219 | block_start = em->block_start; |
| 2220 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
| 2009 | free_extent_map(em); | 2221 | free_extent_map(em); |
| 2010 | em = NULL; | 2222 | em = NULL; |
| 2011 | 2223 | ||
| 2012 | if (block_start == EXTENT_MAP_HOLE || | 2224 | /* |
| 2225 | * compressed and inline extents are written through other | ||
| 2226 | * paths in the FS | ||
| 2227 | */ | ||
| 2228 | if (compressed || block_start == EXTENT_MAP_HOLE || | ||
| 2013 | block_start == EXTENT_MAP_INLINE) { | 2229 | block_start == EXTENT_MAP_INLINE) { |
| 2014 | clear_extent_dirty(tree, cur, | 2230 | clear_extent_dirty(tree, cur, |
| 2015 | cur + iosize - 1, GFP_NOFS); | 2231 | cur + iosize - 1, GFP_NOFS); |
| @@ -2017,16 +2233,28 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2017 | unlock_extent(tree, unlock_start, cur + iosize -1, | 2233 | unlock_extent(tree, unlock_start, cur + iosize -1, |
| 2018 | GFP_NOFS); | 2234 | GFP_NOFS); |
| 2019 | 2235 | ||
| 2020 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2236 | /* |
| 2237 | * end_io notification does not happen here for | ||
| 2238 | * compressed extents | ||
| 2239 | */ | ||
| 2240 | if (!compressed && tree->ops && | ||
| 2241 | tree->ops->writepage_end_io_hook) | ||
| 2021 | tree->ops->writepage_end_io_hook(page, cur, | 2242 | tree->ops->writepage_end_io_hook(page, cur, |
| 2022 | cur + iosize - 1, | 2243 | cur + iosize - 1, |
| 2023 | NULL, 1); | 2244 | NULL, 1); |
| 2024 | cur = cur + iosize; | 2245 | else if (compressed) { |
| 2246 | /* we don't want to end_page_writeback on | ||
| 2247 | * a compressed extent. this happens | ||
| 2248 | * elsewhere | ||
| 2249 | */ | ||
| 2250 | nr++; | ||
| 2251 | } | ||
| 2252 | |||
| 2253 | cur += iosize; | ||
| 2025 | pg_offset += iosize; | 2254 | pg_offset += iosize; |
| 2026 | unlock_start = cur; | 2255 | unlock_start = cur; |
| 2027 | continue; | 2256 | continue; |
| 2028 | } | 2257 | } |
| 2029 | |||
| 2030 | /* leave this out until we have a page_mkwrite call */ | 2258 | /* leave this out until we have a page_mkwrite call */ |
| 2031 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | 2259 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, |
| 2032 | EXTENT_DIRTY, 0)) { | 2260 | EXTENT_DIRTY, 0)) { |
| @@ -2034,6 +2262,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2034 | pg_offset += iosize; | 2262 | pg_offset += iosize; |
| 2035 | continue; | 2263 | continue; |
| 2036 | } | 2264 | } |
| 2265 | |||
| 2037 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); | 2266 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); |
| 2038 | if (tree->ops && tree->ops->writepage_io_hook) { | 2267 | if (tree->ops && tree->ops->writepage_io_hook) { |
| 2039 | ret = tree->ops->writepage_io_hook(page, cur, | 2268 | ret = tree->ops->writepage_io_hook(page, cur, |
| @@ -2057,7 +2286,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2057 | ret = submit_extent_page(WRITE, tree, page, sector, | 2286 | ret = submit_extent_page(WRITE, tree, page, sector, |
| 2058 | iosize, pg_offset, bdev, | 2287 | iosize, pg_offset, bdev, |
| 2059 | &epd->bio, max_nr, | 2288 | &epd->bio, max_nr, |
| 2060 | end_bio_extent_writepage, 0); | 2289 | end_bio_extent_writepage, |
| 2290 | 0, 0, 0); | ||
| 2061 | if (ret) | 2291 | if (ret) |
| 2062 | SetPageError(page); | 2292 | SetPageError(page); |
| 2063 | } | 2293 | } |
| @@ -2226,7 +2456,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
| 2226 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | 2456 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
| 2227 | __extent_writepage, &epd); | 2457 | __extent_writepage, &epd); |
| 2228 | if (epd.bio) { | 2458 | if (epd.bio) { |
| 2229 | submit_one_bio(WRITE, epd.bio, 0); | 2459 | submit_one_bio(WRITE, epd.bio, 0, 0); |
| 2230 | } | 2460 | } |
| 2231 | return ret; | 2461 | return ret; |
| 2232 | } | 2462 | } |
| @@ -2248,7 +2478,7 @@ int extent_writepages(struct extent_io_tree *tree, | |||
| 2248 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2478 | ret = extent_write_cache_pages(tree, mapping, wbc, |
| 2249 | __extent_writepage, &epd); | 2479 | __extent_writepage, &epd); |
| 2250 | if (epd.bio) { | 2480 | if (epd.bio) { |
| 2251 | submit_one_bio(WRITE, epd.bio, 0); | 2481 | submit_one_bio(WRITE, epd.bio, 0, 0); |
| 2252 | } | 2482 | } |
| 2253 | return ret; | 2483 | return ret; |
| 2254 | } | 2484 | } |
| @@ -2262,6 +2492,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
| 2262 | struct bio *bio = NULL; | 2492 | struct bio *bio = NULL; |
| 2263 | unsigned page_idx; | 2493 | unsigned page_idx; |
| 2264 | struct pagevec pvec; | 2494 | struct pagevec pvec; |
| 2495 | unsigned long bio_flags = 0; | ||
| 2265 | 2496 | ||
| 2266 | pagevec_init(&pvec, 0); | 2497 | pagevec_init(&pvec, 0); |
| 2267 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 2498 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
| @@ -2281,7 +2512,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
| 2281 | if (!pagevec_add(&pvec, page)) | 2512 | if (!pagevec_add(&pvec, page)) |
| 2282 | __pagevec_lru_add(&pvec); | 2513 | __pagevec_lru_add(&pvec); |
| 2283 | __extent_read_full_page(tree, page, get_extent, | 2514 | __extent_read_full_page(tree, page, get_extent, |
| 2284 | &bio, 0); | 2515 | &bio, 0, &bio_flags); |
| 2285 | } | 2516 | } |
| 2286 | page_cache_release(page); | 2517 | page_cache_release(page); |
| 2287 | } | 2518 | } |
| @@ -2289,7 +2520,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
| 2289 | __pagevec_lru_add(&pvec); | 2520 | __pagevec_lru_add(&pvec); |
| 2290 | BUG_ON(!list_empty(pages)); | 2521 | BUG_ON(!list_empty(pages)); |
| 2291 | if (bio) | 2522 | if (bio) |
| 2292 | submit_one_bio(READ, bio, 0); | 2523 | submit_one_bio(READ, bio, 0, bio_flags); |
| 2293 | return 0; | 2524 | return 0; |
| 2294 | } | 2525 | } |
| 2295 | EXPORT_SYMBOL(extent_readpages); | 2526 | EXPORT_SYMBOL(extent_readpages); |
| @@ -2414,7 +2645,8 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
| 2414 | ret = submit_extent_page(READ, tree, page, | 2645 | ret = submit_extent_page(READ, tree, page, |
| 2415 | sector, iosize, page_offset, em->bdev, | 2646 | sector, iosize, page_offset, em->bdev, |
| 2416 | NULL, 1, | 2647 | NULL, 1, |
| 2417 | end_bio_extent_preparewrite, 0); | 2648 | end_bio_extent_preparewrite, 0, |
| 2649 | 0, 0); | ||
| 2418 | iocount++; | 2650 | iocount++; |
| 2419 | block_start = block_start + iosize; | 2651 | block_start = block_start + iosize; |
| 2420 | } else { | 2652 | } else { |
| @@ -2495,7 +2727,9 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
| 2495 | } | 2727 | } |
| 2496 | if (!test_range_bit(tree, em->start, | 2728 | if (!test_range_bit(tree, em->start, |
| 2497 | extent_map_end(em) - 1, | 2729 | extent_map_end(em) - 1, |
| 2498 | EXTENT_LOCKED, 0)) { | 2730 | EXTENT_LOCKED | EXTENT_WRITEBACK | |
| 2731 | EXTENT_ORDERED, | ||
| 2732 | 0)) { | ||
| 2499 | remove_extent_mapping(map, em); | 2733 | remove_extent_mapping(map, em); |
| 2500 | /* once for the rb tree */ | 2734 | /* once for the rb tree */ |
| 2501 | free_extent_map(em); | 2735 | free_extent_map(em); |
| @@ -2923,6 +3157,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 2923 | int inc_all_pages = 0; | 3157 | int inc_all_pages = 0; |
| 2924 | unsigned long num_pages; | 3158 | unsigned long num_pages; |
| 2925 | struct bio *bio = NULL; | 3159 | struct bio *bio = NULL; |
| 3160 | unsigned long bio_flags = 0; | ||
| 2926 | 3161 | ||
| 2927 | if (eb->flags & EXTENT_UPTODATE) | 3162 | if (eb->flags & EXTENT_UPTODATE) |
| 2928 | return 0; | 3163 | return 0; |
| @@ -2973,7 +3208,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 2973 | ClearPageError(page); | 3208 | ClearPageError(page); |
| 2974 | err = __extent_read_full_page(tree, page, | 3209 | err = __extent_read_full_page(tree, page, |
| 2975 | get_extent, &bio, | 3210 | get_extent, &bio, |
| 2976 | mirror_num); | 3211 | mirror_num, &bio_flags); |
| 2977 | if (err) { | 3212 | if (err) { |
| 2978 | ret = err; | 3213 | ret = err; |
| 2979 | printk("err %d from __extent_read_full_page\n", ret); | 3214 | printk("err %d from __extent_read_full_page\n", ret); |
| @@ -2984,7 +3219,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 2984 | } | 3219 | } |
| 2985 | 3220 | ||
| 2986 | if (bio) | 3221 | if (bio) |
| 2987 | submit_one_bio(READ, bio, mirror_num); | 3222 | submit_one_bio(READ, bio, mirror_num, bio_flags); |
| 2988 | 3223 | ||
| 2989 | if (ret || !wait) { | 3224 | if (ret || !wait) { |
| 2990 | if (ret) | 3225 | if (ret) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c9d1908a1ae..86f859b87a6 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -18,6 +18,9 @@ | |||
| 18 | #define EXTENT_BOUNDARY (1 << 11) | 18 | #define EXTENT_BOUNDARY (1 << 11) |
| 19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
| 20 | 20 | ||
| 21 | /* flags for bio submission */ | ||
| 22 | #define EXTENT_BIO_COMPRESSED 1 | ||
| 23 | |||
| 21 | /* | 24 | /* |
| 22 | * page->private values. Every page that is controlled by the extent | 25 | * page->private values. Every page that is controlled by the extent |
| 23 | * map has page->private set to one. | 26 | * map has page->private set to one. |
| @@ -28,14 +31,17 @@ | |||
| 28 | struct extent_state; | 31 | struct extent_state; |
| 29 | 32 | ||
| 30 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | 33 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
| 31 | struct bio *bio, int mirror_num); | 34 | struct bio *bio, int mirror_num, |
| 35 | unsigned long bio_flags); | ||
| 32 | struct extent_io_ops { | 36 | struct extent_io_ops { |
| 33 | int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); | 37 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, |
| 38 | u64 start, u64 end, int *page_started); | ||
| 34 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); | 39 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); |
| 35 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); | 40 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); |
| 36 | extent_submit_bio_hook_t *submit_bio_hook; | 41 | extent_submit_bio_hook_t *submit_bio_hook; |
| 37 | int (*merge_bio_hook)(struct page *page, unsigned long offset, | 42 | int (*merge_bio_hook)(struct page *page, unsigned long offset, |
| 38 | size_t size, struct bio *bio); | 43 | size_t size, struct bio *bio, |
| 44 | unsigned long bio_flags); | ||
| 39 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); | 45 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); |
| 40 | int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, | 46 | int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, |
| 41 | u64 start, u64 end, | 47 | u64 start, u64 end, |
| @@ -245,4 +251,9 @@ void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); | |||
| 245 | int release_extent_buffer_tail_pages(struct extent_buffer *eb); | 251 | int release_extent_buffer_tail_pages(struct extent_buffer *eb); |
| 246 | int extent_range_uptodate(struct extent_io_tree *tree, | 252 | int extent_range_uptodate(struct extent_io_tree *tree, |
| 247 | u64 start, u64 end); | 253 | u64 start, u64 end); |
| 254 | int extent_clear_unlock_delalloc(struct inode *inode, | ||
| 255 | struct extent_io_tree *tree, | ||
| 256 | u64 start, u64 end, struct page *locked_page, | ||
| 257 | int clear_dirty, int set_writeback, | ||
| 258 | int clear_writeback); | ||
| 248 | #endif | 259 | #endif |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 74b2a29880d..fd3ebfb8c3c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -184,6 +184,13 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
| 184 | if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) | 184 | if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) |
| 185 | return 0; | 185 | return 0; |
| 186 | 186 | ||
| 187 | /* | ||
| 188 | * don't merge compressed extents, we need to know their | ||
| 189 | * actual size | ||
| 190 | */ | ||
| 191 | if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) | ||
| 192 | return 0; | ||
| 193 | |||
| 187 | if (extent_map_end(prev) == next->start && | 194 | if (extent_map_end(prev) == next->start && |
| 188 | prev->flags == next->flags && | 195 | prev->flags == next->flags && |
| 189 | prev->bdev == next->bdev && | 196 | prev->bdev == next->bdev && |
| @@ -239,6 +246,7 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
| 239 | if (rb && mergable_maps(merge, em)) { | 246 | if (rb && mergable_maps(merge, em)) { |
| 240 | em->start = merge->start; | 247 | em->start = merge->start; |
| 241 | em->len += merge->len; | 248 | em->len += merge->len; |
| 249 | em->block_len += merge->block_len; | ||
| 242 | em->block_start = merge->block_start; | 250 | em->block_start = merge->block_start; |
| 243 | merge->in_tree = 0; | 251 | merge->in_tree = 0; |
| 244 | rb_erase(&merge->rb_node, &tree->map); | 252 | rb_erase(&merge->rb_node, &tree->map); |
| @@ -250,6 +258,7 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
| 250 | merge = rb_entry(rb, struct extent_map, rb_node); | 258 | merge = rb_entry(rb, struct extent_map, rb_node); |
| 251 | if (rb && mergable_maps(em, merge)) { | 259 | if (rb && mergable_maps(em, merge)) { |
| 252 | em->len += merge->len; | 260 | em->len += merge->len; |
| 261 | em->block_len += merge->len; | ||
| 253 | rb_erase(&merge->rb_node, &tree->map); | 262 | rb_erase(&merge->rb_node, &tree->map); |
| 254 | merge->in_tree = 0; | 263 | merge->in_tree = 0; |
| 255 | free_extent_map(merge); | 264 | free_extent_map(merge); |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 26ac6fe0b26..abbcbeb28c7 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | /* bits for the flags field */ | 11 | /* bits for the flags field */ |
| 12 | #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ | 12 | #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ |
| 13 | #define EXTENT_FLAG_COMPRESSED 1 | ||
| 13 | 14 | ||
| 14 | struct extent_map { | 15 | struct extent_map { |
| 15 | struct rb_node rb_node; | 16 | struct rb_node rb_node; |
| @@ -18,6 +19,7 @@ struct extent_map { | |||
| 18 | u64 start; | 19 | u64 start; |
| 19 | u64 len; | 20 | u64 len; |
| 20 | u64 block_start; | 21 | u64 block_start; |
| 22 | u64 block_len; | ||
| 21 | unsigned long flags; | 23 | unsigned long flags; |
| 22 | struct block_device *bdev; | 24 | struct block_device *bdev; |
| 23 | atomic_t refs; | 25 | atomic_t refs; |
| @@ -38,9 +40,9 @@ static inline u64 extent_map_end(struct extent_map *em) | |||
| 38 | 40 | ||
| 39 | static inline u64 extent_map_block_end(struct extent_map *em) | 41 | static inline u64 extent_map_block_end(struct extent_map *em) |
| 40 | { | 42 | { |
| 41 | if (em->block_start + em->len < em->block_start) | 43 | if (em->block_start + em->block_len < em->block_start) |
| 42 | return (u64)-1; | 44 | return (u64)-1; |
| 43 | return em->block_start + em->len; | 45 | return em->block_start + em->block_len; |
| 44 | } | 46 | } |
| 45 | 47 | ||
| 46 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); | 48 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 6dbe88b9d7d..f4d3fa71bc4 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -31,7 +31,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
| 31 | struct btrfs_root *root, | 31 | struct btrfs_root *root, |
| 32 | u64 objectid, u64 pos, | 32 | u64 objectid, u64 pos, |
| 33 | u64 disk_offset, u64 disk_num_bytes, | 33 | u64 disk_offset, u64 disk_num_bytes, |
| 34 | u64 num_bytes, u64 offset) | 34 | u64 num_bytes, u64 offset, u64 ram_bytes, |
| 35 | u8 compression, u8 encryption, u16 other_encoding) | ||
| 35 | { | 36 | { |
| 36 | int ret = 0; | 37 | int ret = 0; |
| 37 | struct btrfs_file_extent_item *item; | 38 | struct btrfs_file_extent_item *item; |
| @@ -57,8 +58,13 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
| 57 | btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); | 58 | btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); |
| 58 | btrfs_set_file_extent_offset(leaf, item, offset); | 59 | btrfs_set_file_extent_offset(leaf, item, offset); |
| 59 | btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); | 60 | btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); |
| 61 | btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes); | ||
| 60 | btrfs_set_file_extent_generation(leaf, item, trans->transid); | 62 | btrfs_set_file_extent_generation(leaf, item, trans->transid); |
| 61 | btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); | 63 | btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); |
| 64 | btrfs_set_file_extent_compression(leaf, item, compression); | ||
| 65 | btrfs_set_file_extent_encryption(leaf, item, encryption); | ||
| 66 | btrfs_set_file_extent_other_encoding(leaf, item, other_encoding); | ||
| 67 | |||
| 62 | btrfs_mark_buffer_dirty(leaf); | 68 | btrfs_mark_buffer_dirty(leaf); |
| 63 | out: | 69 | out: |
| 64 | btrfs_free_path(path); | 70 | btrfs_free_path(path); |
| @@ -213,6 +219,73 @@ found: | |||
| 213 | return 0; | 219 | return 0; |
| 214 | } | 220 | } |
| 215 | 221 | ||
| 222 | int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode, | ||
| 223 | u64 start, unsigned long len) | ||
| 224 | { | ||
| 225 | struct btrfs_ordered_sum *sums; | ||
| 226 | struct btrfs_sector_sum *sector_sum; | ||
| 227 | struct btrfs_ordered_extent *ordered; | ||
| 228 | char *data; | ||
| 229 | struct page *page; | ||
| 230 | unsigned long total_bytes = 0; | ||
| 231 | unsigned long this_sum_bytes = 0; | ||
| 232 | |||
| 233 | sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS); | ||
| 234 | if (!sums) | ||
| 235 | return -ENOMEM; | ||
| 236 | |||
| 237 | sector_sum = sums->sums; | ||
| 238 | sums->file_offset = start; | ||
| 239 | sums->len = len; | ||
| 240 | INIT_LIST_HEAD(&sums->list); | ||
| 241 | ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset); | ||
| 242 | BUG_ON(!ordered); | ||
| 243 | |||
| 244 | while(len > 0) { | ||
| 245 | if (start >= ordered->file_offset + ordered->len || | ||
| 246 | start < ordered->file_offset) { | ||
| 247 | sums->len = this_sum_bytes; | ||
| 248 | this_sum_bytes = 0; | ||
| 249 | btrfs_add_ordered_sum(inode, ordered, sums); | ||
| 250 | btrfs_put_ordered_extent(ordered); | ||
| 251 | |||
| 252 | sums = kzalloc(btrfs_ordered_sum_size(root, len), | ||
| 253 | GFP_NOFS); | ||
| 254 | BUG_ON(!sums); | ||
| 255 | sector_sum = sums->sums; | ||
| 256 | sums->len = len; | ||
| 257 | sums->file_offset = start; | ||
| 258 | ordered = btrfs_lookup_ordered_extent(inode, | ||
| 259 | sums->file_offset); | ||
| 260 | BUG_ON(!ordered); | ||
| 261 | } | ||
| 262 | |||
| 263 | page = find_get_page(inode->i_mapping, | ||
| 264 | start >> PAGE_CACHE_SHIFT); | ||
| 265 | |||
| 266 | data = kmap_atomic(page, KM_USER0); | ||
| 267 | sector_sum->sum = ~(u32)0; | ||
| 268 | sector_sum->sum = btrfs_csum_data(root, data, sector_sum->sum, | ||
| 269 | PAGE_CACHE_SIZE); | ||
| 270 | kunmap_atomic(data, KM_USER0); | ||
| 271 | btrfs_csum_final(sector_sum->sum, | ||
| 272 | (char *)§or_sum->sum); | ||
| 273 | sector_sum->offset = page_offset(page); | ||
| 274 | page_cache_release(page); | ||
| 275 | |||
| 276 | sector_sum++; | ||
| 277 | total_bytes += PAGE_CACHE_SIZE; | ||
| 278 | this_sum_bytes += PAGE_CACHE_SIZE; | ||
| 279 | start += PAGE_CACHE_SIZE; | ||
| 280 | |||
| 281 | WARN_ON(len < PAGE_CACHE_SIZE); | ||
| 282 | len -= PAGE_CACHE_SIZE; | ||
| 283 | } | ||
| 284 | btrfs_add_ordered_sum(inode, ordered, sums); | ||
| 285 | btrfs_put_ordered_extent(ordered); | ||
| 286 | return 0; | ||
| 287 | } | ||
| 288 | |||
| 216 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | 289 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, |
| 217 | struct bio *bio) | 290 | struct bio *bio) |
| 218 | { | 291 | { |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 69abbe19add..0aa15436590 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -95,153 +95,6 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
| 95 | } | 95 | } |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | /* this does all the hard work for inserting an inline extent into | ||
| 99 | * the btree. Any existing inline extent is extended as required to make room, | ||
| 100 | * otherwise things are inserted as required into the btree | ||
| 101 | */ | ||
| 102 | static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, | ||
| 103 | struct btrfs_root *root, struct inode *inode, | ||
| 104 | u64 offset, size_t size, | ||
| 105 | struct page **pages, size_t page_offset, | ||
| 106 | int num_pages) | ||
| 107 | { | ||
| 108 | struct btrfs_key key; | ||
| 109 | struct btrfs_path *path; | ||
| 110 | struct extent_buffer *leaf; | ||
| 111 | char *kaddr; | ||
| 112 | unsigned long ptr; | ||
| 113 | struct btrfs_file_extent_item *ei; | ||
| 114 | struct page *page; | ||
| 115 | u32 datasize; | ||
| 116 | int err = 0; | ||
| 117 | int ret; | ||
| 118 | int i; | ||
| 119 | ssize_t cur_size; | ||
| 120 | |||
| 121 | path = btrfs_alloc_path(); | ||
| 122 | if (!path) | ||
| 123 | return -ENOMEM; | ||
| 124 | |||
| 125 | btrfs_set_trans_block_group(trans, inode); | ||
| 126 | |||
| 127 | key.objectid = inode->i_ino; | ||
| 128 | key.offset = offset; | ||
| 129 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | ||
| 130 | |||
| 131 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
| 132 | if (ret < 0) { | ||
| 133 | err = ret; | ||
| 134 | goto fail; | ||
| 135 | } | ||
| 136 | if (ret == 1) { | ||
| 137 | struct btrfs_key found_key; | ||
| 138 | |||
| 139 | if (path->slots[0] == 0) | ||
| 140 | goto insert; | ||
| 141 | |||
| 142 | path->slots[0]--; | ||
| 143 | leaf = path->nodes[0]; | ||
| 144 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 145 | |||
| 146 | if (found_key.objectid != inode->i_ino) | ||
| 147 | goto insert; | ||
| 148 | |||
| 149 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 150 | goto insert; | ||
| 151 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 152 | struct btrfs_file_extent_item); | ||
| 153 | |||
| 154 | if (btrfs_file_extent_type(leaf, ei) != | ||
| 155 | BTRFS_FILE_EXTENT_INLINE) { | ||
| 156 | goto insert; | ||
| 157 | } | ||
| 158 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 159 | ret = 0; | ||
| 160 | } | ||
| 161 | if (ret == 0) { | ||
| 162 | u32 found_size; | ||
| 163 | u64 found_end; | ||
| 164 | |||
| 165 | leaf = path->nodes[0]; | ||
| 166 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 167 | struct btrfs_file_extent_item); | ||
| 168 | |||
| 169 | if (btrfs_file_extent_type(leaf, ei) != | ||
| 170 | BTRFS_FILE_EXTENT_INLINE) { | ||
| 171 | err = ret; | ||
| 172 | btrfs_print_leaf(root, leaf); | ||
| 173 | printk("found wasn't inline offset %Lu inode %lu\n", | ||
| 174 | offset, inode->i_ino); | ||
| 175 | goto fail; | ||
| 176 | } | ||
| 177 | found_size = btrfs_file_extent_inline_len(leaf, | ||
| 178 | btrfs_item_nr(leaf, path->slots[0])); | ||
| 179 | found_end = key.offset + found_size; | ||
| 180 | |||
| 181 | if (found_end < offset + size) { | ||
| 182 | btrfs_release_path(root, path); | ||
| 183 | ret = btrfs_search_slot(trans, root, &key, path, | ||
| 184 | offset + size - found_end, 1); | ||
| 185 | BUG_ON(ret != 0); | ||
| 186 | |||
| 187 | ret = btrfs_extend_item(trans, root, path, | ||
| 188 | offset + size - found_end); | ||
| 189 | if (ret) { | ||
| 190 | err = ret; | ||
| 191 | goto fail; | ||
| 192 | } | ||
| 193 | leaf = path->nodes[0]; | ||
| 194 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 195 | struct btrfs_file_extent_item); | ||
| 196 | inode_add_bytes(inode, offset + size - found_end); | ||
| 197 | } | ||
| 198 | if (found_end < offset) { | ||
| 199 | ptr = btrfs_file_extent_inline_start(ei) + found_size; | ||
| 200 | memset_extent_buffer(leaf, 0, ptr, offset - found_end); | ||
| 201 | } | ||
| 202 | } else { | ||
| 203 | insert: | ||
| 204 | btrfs_release_path(root, path); | ||
| 205 | datasize = offset + size - key.offset; | ||
| 206 | inode_add_bytes(inode, datasize); | ||
| 207 | datasize = btrfs_file_extent_calc_inline_size(datasize); | ||
| 208 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
| 209 | datasize); | ||
| 210 | if (ret) { | ||
| 211 | err = ret; | ||
| 212 | printk("got bad ret %d\n", ret); | ||
| 213 | goto fail; | ||
| 214 | } | ||
| 215 | leaf = path->nodes[0]; | ||
| 216 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 217 | struct btrfs_file_extent_item); | ||
| 218 | btrfs_set_file_extent_generation(leaf, ei, trans->transid); | ||
| 219 | btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); | ||
| 220 | } | ||
| 221 | ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset; | ||
| 222 | |||
| 223 | cur_size = size; | ||
| 224 | i = 0; | ||
| 225 | while (size > 0) { | ||
| 226 | page = pages[i]; | ||
| 227 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 228 | cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size); | ||
| 229 | write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size); | ||
| 230 | kunmap_atomic(kaddr, KM_USER0); | ||
| 231 | page_offset = 0; | ||
| 232 | ptr += cur_size; | ||
| 233 | size -= cur_size; | ||
| 234 | if (i >= num_pages) { | ||
| 235 | printk("i %d num_pages %d\n", i, num_pages); | ||
| 236 | } | ||
| 237 | i++; | ||
| 238 | } | ||
| 239 | btrfs_mark_buffer_dirty(leaf); | ||
| 240 | fail: | ||
| 241 | btrfs_free_path(path); | ||
| 242 | return err; | ||
| 243 | } | ||
| 244 | |||
| 245 | /* | 98 | /* |
| 246 | * after copy_from_user, pages need to be dirtied and we need to make | 99 | * after copy_from_user, pages need to be dirtied and we need to make |
| 247 | * sure holes are created between the current EOF and the start of | 100 | * sure holes are created between the current EOF and the start of |
| @@ -267,8 +120,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 267 | u64 start_pos; | 120 | u64 start_pos; |
| 268 | u64 end_of_last_block; | 121 | u64 end_of_last_block; |
| 269 | u64 end_pos = pos + write_bytes; | 122 | u64 end_pos = pos + write_bytes; |
| 270 | u64 inline_size; | ||
| 271 | int did_inline = 0; | ||
| 272 | loff_t isize = i_size_read(inode); | 123 | loff_t isize = i_size_read(inode); |
| 273 | 124 | ||
| 274 | start_pos = pos & ~((u64)root->sectorsize - 1); | 125 | start_pos = pos & ~((u64)root->sectorsize - 1); |
| @@ -314,7 +165,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 314 | err = btrfs_insert_file_extent(trans, root, | 165 | err = btrfs_insert_file_extent(trans, root, |
| 315 | inode->i_ino, | 166 | inode->i_ino, |
| 316 | last_pos_in_file, | 167 | last_pos_in_file, |
| 317 | 0, 0, hole_size, 0); | 168 | 0, 0, hole_size, 0, |
| 169 | hole_size, 0, 0, 0); | ||
| 318 | btrfs_drop_extent_cache(inode, last_pos_in_file, | 170 | btrfs_drop_extent_cache(inode, last_pos_in_file, |
| 319 | last_pos_in_file + hole_size - 1, 0); | 171 | last_pos_in_file + hole_size - 1, 0); |
| 320 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | 172 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); |
| @@ -324,57 +176,19 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 324 | goto failed; | 176 | goto failed; |
| 325 | } | 177 | } |
| 326 | 178 | ||
| 327 | /* | 179 | /* check for reserved extents on each page, we don't want |
| 328 | * either allocate an extent for the new bytes or setup the key | 180 | * to reset the delalloc bit on things that already have |
| 329 | * to show we are doing inline data in the extent | 181 | * extents reserved. |
| 330 | */ | 182 | */ |
| 331 | inline_size = end_pos; | 183 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); |
| 332 | if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || | 184 | for (i = 0; i < num_pages; i++) { |
| 333 | inline_size > root->fs_info->max_inline || | 185 | struct page *p = pages[i]; |
| 334 | (inline_size & (root->sectorsize -1)) == 0 || | 186 | SetPageUptodate(p); |
| 335 | inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { | 187 | ClearPageChecked(p); |
| 336 | /* check for reserved extents on each page, we don't want | 188 | set_page_dirty(p); |
| 337 | * to reset the delalloc bit on things that already have | ||
| 338 | * extents reserved. | ||
| 339 | */ | ||
| 340 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | ||
| 341 | for (i = 0; i < num_pages; i++) { | ||
| 342 | struct page *p = pages[i]; | ||
| 343 | SetPageUptodate(p); | ||
| 344 | ClearPageChecked(p); | ||
| 345 | set_page_dirty(p); | ||
| 346 | } | ||
| 347 | } else { | ||
| 348 | u64 aligned_end; | ||
| 349 | /* step one, delete the existing extents in this range */ | ||
| 350 | aligned_end = (pos + write_bytes + root->sectorsize - 1) & | ||
| 351 | ~((u64)root->sectorsize - 1); | ||
| 352 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 353 | err = btrfs_drop_extents(trans, root, inode, start_pos, | ||
| 354 | aligned_end, aligned_end, &hint_byte); | ||
| 355 | if (err) | ||
| 356 | goto failed; | ||
| 357 | if (isize > inline_size) | ||
| 358 | inline_size = min_t(u64, isize, aligned_end); | ||
| 359 | inline_size -= start_pos; | ||
| 360 | err = insert_inline_extent(trans, root, inode, start_pos, | ||
| 361 | inline_size, pages, 0, num_pages); | ||
| 362 | btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0); | ||
| 363 | BUG_ON(err); | ||
| 364 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 365 | |||
| 366 | /* | ||
| 367 | * an ugly way to do all the prop accounting around | ||
| 368 | * the page bits and mapping tags | ||
| 369 | */ | ||
| 370 | set_page_writeback(pages[0]); | ||
| 371 | end_page_writeback(pages[0]); | ||
| 372 | did_inline = 1; | ||
| 373 | } | 189 | } |
| 374 | if (end_pos > isize) { | 190 | if (end_pos > isize) { |
| 375 | i_size_write(inode, end_pos); | 191 | i_size_write(inode, end_pos); |
| 376 | if (did_inline) | ||
| 377 | BTRFS_I(inode)->disk_i_size = end_pos; | ||
| 378 | btrfs_update_inode(trans, root, inode); | 192 | btrfs_update_inode(trans, root, inode); |
| 379 | } | 193 | } |
| 380 | failed: | 194 | failed: |
| @@ -399,6 +213,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 399 | int ret; | 213 | int ret; |
| 400 | int testend = 1; | 214 | int testend = 1; |
| 401 | unsigned long flags; | 215 | unsigned long flags; |
| 216 | int compressed = 0; | ||
| 402 | 217 | ||
| 403 | WARN_ON(end < start); | 218 | WARN_ON(end < start); |
| 404 | if (end == (u64)-1) { | 219 | if (end == (u64)-1) { |
| @@ -434,6 +249,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 434 | free_extent_map(em); | 249 | free_extent_map(em); |
| 435 | continue; | 250 | continue; |
| 436 | } | 251 | } |
| 252 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
| 437 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 253 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 438 | remove_extent_mapping(em_tree, em); | 254 | remove_extent_mapping(em_tree, em); |
| 439 | 255 | ||
| @@ -442,6 +258,12 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 442 | split->start = em->start; | 258 | split->start = em->start; |
| 443 | split->len = start - em->start; | 259 | split->len = start - em->start; |
| 444 | split->block_start = em->block_start; | 260 | split->block_start = em->block_start; |
| 261 | |||
| 262 | if (compressed) | ||
| 263 | split->block_len = em->block_len; | ||
| 264 | else | ||
| 265 | split->block_len = split->len; | ||
| 266 | |||
| 445 | split->bdev = em->bdev; | 267 | split->bdev = em->bdev; |
| 446 | split->flags = flags; | 268 | split->flags = flags; |
| 447 | ret = add_extent_mapping(em_tree, split); | 269 | ret = add_extent_mapping(em_tree, split); |
| @@ -459,7 +281,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 459 | split->bdev = em->bdev; | 281 | split->bdev = em->bdev; |
| 460 | split->flags = flags; | 282 | split->flags = flags; |
| 461 | 283 | ||
| 462 | split->block_start = em->block_start + diff; | 284 | if (compressed) { |
| 285 | split->block_len = em->block_len; | ||
| 286 | split->block_start = em->block_start; | ||
| 287 | } else { | ||
| 288 | split->block_len = split->len; | ||
| 289 | split->block_start = em->block_start + diff; | ||
| 290 | } | ||
| 463 | 291 | ||
| 464 | ret = add_extent_mapping(em_tree, split); | 292 | ret = add_extent_mapping(em_tree, split); |
| 465 | BUG_ON(ret); | 293 | BUG_ON(ret); |
| @@ -533,7 +361,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) | |||
| 533 | struct btrfs_item *item; | 361 | struct btrfs_item *item; |
| 534 | item = btrfs_item_nr(leaf, slot); | 362 | item = btrfs_item_nr(leaf, slot); |
| 535 | extent_end = found_key.offset + | 363 | extent_end = found_key.offset + |
| 536 | btrfs_file_extent_inline_len(leaf, item); | 364 | btrfs_file_extent_inline_len(leaf, extent); |
| 537 | extent_end = (extent_end + root->sectorsize - 1) & | 365 | extent_end = (extent_end + root->sectorsize - 1) & |
| 538 | ~((u64)root->sectorsize -1 ); | 366 | ~((u64)root->sectorsize -1 ); |
| 539 | } | 367 | } |
| @@ -573,6 +401,10 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
| 573 | u64 extent_end = 0; | 401 | u64 extent_end = 0; |
| 574 | u64 search_start = start; | 402 | u64 search_start = start; |
| 575 | u64 leaf_start; | 403 | u64 leaf_start; |
| 404 | u64 ram_bytes = 0; | ||
| 405 | u8 compression = 0; | ||
| 406 | u8 encryption = 0; | ||
| 407 | u16 other_encoding = 0; | ||
| 576 | u64 root_gen; | 408 | u64 root_gen; |
| 577 | u64 root_owner; | 409 | u64 root_owner; |
| 578 | struct extent_buffer *leaf; | 410 | struct extent_buffer *leaf; |
| @@ -589,6 +421,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
| 589 | int recow; | 421 | int recow; |
| 590 | int ret; | 422 | int ret; |
| 591 | 423 | ||
| 424 | inline_limit = 0; | ||
| 592 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 425 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
| 593 | 426 | ||
| 594 | path = btrfs_alloc_path(); | 427 | path = btrfs_alloc_path(); |
| @@ -637,6 +470,12 @@ next_slot: | |||
| 637 | extent = btrfs_item_ptr(leaf, slot, | 470 | extent = btrfs_item_ptr(leaf, slot, |
| 638 | struct btrfs_file_extent_item); | 471 | struct btrfs_file_extent_item); |
| 639 | found_type = btrfs_file_extent_type(leaf, extent); | 472 | found_type = btrfs_file_extent_type(leaf, extent); |
| 473 | compression = btrfs_file_extent_compression(leaf, | ||
| 474 | extent); | ||
| 475 | encryption = btrfs_file_extent_encryption(leaf, | ||
| 476 | extent); | ||
| 477 | other_encoding = btrfs_file_extent_other_encoding(leaf, | ||
| 478 | extent); | ||
| 640 | if (found_type == BTRFS_FILE_EXTENT_REG) { | 479 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
| 641 | extent_end = | 480 | extent_end = |
| 642 | btrfs_file_extent_disk_bytenr(leaf, | 481 | btrfs_file_extent_disk_bytenr(leaf, |
| @@ -646,13 +485,13 @@ next_slot: | |||
| 646 | 485 | ||
| 647 | extent_end = key.offset + | 486 | extent_end = key.offset + |
| 648 | btrfs_file_extent_num_bytes(leaf, extent); | 487 | btrfs_file_extent_num_bytes(leaf, extent); |
| 488 | ram_bytes = btrfs_file_extent_ram_bytes(leaf, | ||
| 489 | extent); | ||
| 649 | found_extent = 1; | 490 | found_extent = 1; |
| 650 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 491 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
| 651 | struct btrfs_item *item; | ||
| 652 | item = btrfs_item_nr(leaf, slot); | ||
| 653 | found_inline = 1; | 492 | found_inline = 1; |
| 654 | extent_end = key.offset + | 493 | extent_end = key.offset + |
| 655 | btrfs_file_extent_inline_len(leaf, item); | 494 | btrfs_file_extent_inline_len(leaf, extent); |
| 656 | } | 495 | } |
| 657 | } else { | 496 | } else { |
| 658 | extent_end = search_start; | 497 | extent_end = search_start; |
| @@ -680,10 +519,9 @@ next_slot: | |||
| 680 | search_start = (extent_end + mask) & ~mask; | 519 | search_start = (extent_end + mask) & ~mask; |
| 681 | } else | 520 | } else |
| 682 | search_start = extent_end; | 521 | search_start = extent_end; |
| 683 | if (end <= extent_end && start >= key.offset && found_inline) { | 522 | |
| 523 | if (end <= extent_end && start >= key.offset && found_inline) | ||
| 684 | *hint_byte = EXTENT_MAP_INLINE; | 524 | *hint_byte = EXTENT_MAP_INLINE; |
| 685 | goto out; | ||
| 686 | } | ||
| 687 | 525 | ||
| 688 | if (found_extent) { | 526 | if (found_extent) { |
| 689 | read_extent_buffer(leaf, &old, (unsigned long)extent, | 527 | read_extent_buffer(leaf, &old, (unsigned long)extent, |
| @@ -770,12 +608,27 @@ next_slot: | |||
| 770 | write_extent_buffer(leaf, &old, | 608 | write_extent_buffer(leaf, &old, |
| 771 | (unsigned long)extent, sizeof(old)); | 609 | (unsigned long)extent, sizeof(old)); |
| 772 | 610 | ||
| 611 | btrfs_set_file_extent_compression(leaf, extent, | ||
| 612 | compression); | ||
| 613 | btrfs_set_file_extent_encryption(leaf, extent, | ||
| 614 | encryption); | ||
| 615 | btrfs_set_file_extent_other_encoding(leaf, extent, | ||
| 616 | other_encoding); | ||
| 773 | btrfs_set_file_extent_offset(leaf, extent, | 617 | btrfs_set_file_extent_offset(leaf, extent, |
| 774 | le64_to_cpu(old.offset) + end - key.offset); | 618 | le64_to_cpu(old.offset) + end - key.offset); |
| 775 | WARN_ON(le64_to_cpu(old.num_bytes) < | 619 | WARN_ON(le64_to_cpu(old.num_bytes) < |
| 776 | (extent_end - end)); | 620 | (extent_end - end)); |
| 777 | btrfs_set_file_extent_num_bytes(leaf, extent, | 621 | btrfs_set_file_extent_num_bytes(leaf, extent, |
| 778 | extent_end - end); | 622 | extent_end - end); |
| 623 | |||
| 624 | /* | ||
| 625 | * set the ram bytes to the size of the full extent | ||
| 626 | * before splitting. This is a worst case flag, | ||
| 627 | * but its the best we can do because we don't know | ||
| 628 | * how splitting affects compression | ||
| 629 | */ | ||
| 630 | btrfs_set_file_extent_ram_bytes(leaf, extent, | ||
| 631 | ram_bytes); | ||
| 779 | btrfs_set_file_extent_type(leaf, extent, | 632 | btrfs_set_file_extent_type(leaf, extent, |
| 780 | BTRFS_FILE_EXTENT_REG); | 633 | BTRFS_FILE_EXTENT_REG); |
| 781 | 634 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bf4bed6ca4d..9797592dc86 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include "compat.h" | 49 | #include "compat.h" |
| 50 | #include "tree-log.h" | 50 | #include "tree-log.h" |
| 51 | #include "ref-cache.h" | 51 | #include "ref-cache.h" |
| 52 | #include "compression.h" | ||
| 52 | 53 | ||
| 53 | struct btrfs_iget_args { | 54 | struct btrfs_iget_args { |
| 54 | u64 ino; | 55 | u64 ino; |
| @@ -83,6 +84,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
| 83 | }; | 84 | }; |
| 84 | 85 | ||
| 85 | static void btrfs_truncate(struct inode *inode); | 86 | static void btrfs_truncate(struct inode *inode); |
| 87 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); | ||
| 86 | 88 | ||
| 87 | /* | 89 | /* |
| 88 | * a very lame attempt at stopping writes when the FS is 85% full. There | 90 | * a very lame attempt at stopping writes when the FS is 85% full. There |
| @@ -114,57 +116,374 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, | |||
| 114 | } | 116 | } |
| 115 | 117 | ||
| 116 | /* | 118 | /* |
| 119 | * this does all the hard work for inserting an inline extent into | ||
| 120 | * the btree. The caller should have done a btrfs_drop_extents so that | ||
| 121 | * no overlapping inline items exist in the btree | ||
| 122 | */ | ||
| 123 | static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, | ||
| 124 | struct btrfs_root *root, struct inode *inode, | ||
| 125 | u64 start, size_t size, size_t compressed_size, | ||
| 126 | struct page **compressed_pages) | ||
| 127 | { | ||
| 128 | struct btrfs_key key; | ||
| 129 | struct btrfs_path *path; | ||
| 130 | struct extent_buffer *leaf; | ||
| 131 | struct page *page = NULL; | ||
| 132 | char *kaddr; | ||
| 133 | unsigned long ptr; | ||
| 134 | struct btrfs_file_extent_item *ei; | ||
| 135 | int err = 0; | ||
| 136 | int ret; | ||
| 137 | size_t cur_size = size; | ||
| 138 | size_t datasize; | ||
| 139 | unsigned long offset; | ||
| 140 | int use_compress = 0; | ||
| 141 | |||
| 142 | if (compressed_size && compressed_pages) { | ||
| 143 | use_compress = 1; | ||
| 144 | cur_size = compressed_size; | ||
| 145 | } | ||
| 146 | |||
| 147 | path = btrfs_alloc_path(); if (!path) | ||
| 148 | return -ENOMEM; | ||
| 149 | |||
| 150 | btrfs_set_trans_block_group(trans, inode); | ||
| 151 | |||
| 152 | key.objectid = inode->i_ino; | ||
| 153 | key.offset = start; | ||
| 154 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | ||
| 155 | inode_add_bytes(inode, size); | ||
| 156 | datasize = btrfs_file_extent_calc_inline_size(cur_size); | ||
| 157 | |||
| 158 | inode_add_bytes(inode, size); | ||
| 159 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
| 160 | datasize); | ||
| 161 | BUG_ON(ret); | ||
| 162 | if (ret) { | ||
| 163 | err = ret; | ||
| 164 | printk("got bad ret %d\n", ret); | ||
| 165 | goto fail; | ||
| 166 | } | ||
| 167 | leaf = path->nodes[0]; | ||
| 168 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 169 | struct btrfs_file_extent_item); | ||
| 170 | btrfs_set_file_extent_generation(leaf, ei, trans->transid); | ||
| 171 | btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); | ||
| 172 | btrfs_set_file_extent_encryption(leaf, ei, 0); | ||
| 173 | btrfs_set_file_extent_other_encoding(leaf, ei, 0); | ||
| 174 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); | ||
| 175 | ptr = btrfs_file_extent_inline_start(ei); | ||
| 176 | |||
| 177 | if (use_compress) { | ||
| 178 | struct page *cpage; | ||
| 179 | int i = 0; | ||
| 180 | while(compressed_size > 0) { | ||
| 181 | cpage = compressed_pages[i]; | ||
| 182 | cur_size = min(compressed_size, | ||
| 183 | PAGE_CACHE_SIZE); | ||
| 184 | |||
| 185 | kaddr = kmap(cpage); | ||
| 186 | write_extent_buffer(leaf, kaddr, ptr, cur_size); | ||
| 187 | kunmap(cpage); | ||
| 188 | |||
| 189 | i++; | ||
| 190 | ptr += cur_size; | ||
| 191 | compressed_size -= cur_size; | ||
| 192 | } | ||
| 193 | btrfs_set_file_extent_compression(leaf, ei, | ||
| 194 | BTRFS_COMPRESS_ZLIB); | ||
| 195 | } else { | ||
| 196 | page = find_get_page(inode->i_mapping, | ||
| 197 | start >> PAGE_CACHE_SHIFT); | ||
| 198 | btrfs_set_file_extent_compression(leaf, ei, 0); | ||
| 199 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 200 | offset = start & (PAGE_CACHE_SIZE - 1); | ||
| 201 | write_extent_buffer(leaf, kaddr + offset, ptr, size); | ||
| 202 | kunmap_atomic(kaddr, KM_USER0); | ||
| 203 | page_cache_release(page); | ||
| 204 | } | ||
| 205 | btrfs_mark_buffer_dirty(leaf); | ||
| 206 | btrfs_free_path(path); | ||
| 207 | |||
| 208 | BTRFS_I(inode)->disk_i_size = inode->i_size; | ||
| 209 | btrfs_update_inode(trans, root, inode); | ||
| 210 | return 0; | ||
| 211 | fail: | ||
| 212 | btrfs_free_path(path); | ||
| 213 | return err; | ||
| 214 | } | ||
| 215 | |||
| 216 | |||
| 217 | /* | ||
| 218 | * conditionally insert an inline extent into the file. This | ||
| 219 | * does the checks required to make sure the data is small enough | ||
| 220 | * to fit as an inline extent. | ||
| 221 | */ | ||
| 222 | static int cow_file_range_inline(struct btrfs_trans_handle *trans, | ||
| 223 | struct btrfs_root *root, | ||
| 224 | struct inode *inode, u64 start, u64 end, | ||
| 225 | size_t compressed_size, | ||
| 226 | struct page **compressed_pages) | ||
| 227 | { | ||
| 228 | u64 isize = i_size_read(inode); | ||
| 229 | u64 actual_end = min(end + 1, isize); | ||
| 230 | u64 inline_len = actual_end - start; | ||
| 231 | u64 aligned_end = (end + root->sectorsize - 1) & | ||
| 232 | ~((u64)root->sectorsize - 1); | ||
| 233 | u64 hint_byte; | ||
| 234 | u64 data_len = inline_len; | ||
| 235 | int ret; | ||
| 236 | |||
| 237 | if (compressed_size) | ||
| 238 | data_len = compressed_size; | ||
| 239 | |||
| 240 | if (start > 0 || | ||
| 241 | data_len >= BTRFS_MAX_INLINE_DATA_SIZE(root) || | ||
| 242 | (!compressed_size && | ||
| 243 | (actual_end & (root->sectorsize - 1)) == 0) || | ||
| 244 | end + 1 < isize || | ||
| 245 | data_len > root->fs_info->max_inline) { | ||
| 246 | return 1; | ||
| 247 | } | ||
| 248 | |||
| 249 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
| 250 | ret = btrfs_drop_extents(trans, root, inode, start, | ||
| 251 | aligned_end, aligned_end, &hint_byte); | ||
| 252 | BUG_ON(ret); | ||
| 253 | |||
| 254 | if (isize > actual_end) | ||
| 255 | inline_len = min_t(u64, isize, actual_end); | ||
| 256 | ret = insert_inline_extent(trans, root, inode, start, | ||
| 257 | inline_len, compressed_size, | ||
| 258 | compressed_pages); | ||
| 259 | BUG_ON(ret); | ||
| 260 | btrfs_drop_extent_cache(inode, start, aligned_end, 0); | ||
| 261 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
| 262 | return 0; | ||
| 263 | } | ||
| 264 | |||
| 265 | /* | ||
| 117 | * when extent_io.c finds a delayed allocation range in the file, | 266 | * when extent_io.c finds a delayed allocation range in the file, |
| 118 | * the call backs end up in this code. The basic idea is to | 267 | * the call backs end up in this code. The basic idea is to |
| 119 | * allocate extents on disk for the range, and create ordered data structs | 268 | * allocate extents on disk for the range, and create ordered data structs |
| 120 | * in ram to track those extents. | 269 | * in ram to track those extents. |
| 270 | * | ||
| 271 | * locked_page is the page that writepage had locked already. We use | ||
| 272 | * it to make sure we don't do extra locks or unlocks. | ||
| 273 | * | ||
| 274 | * *page_started is set to one if we unlock locked_page and do everything | ||
| 275 | * required to start IO on it. It may be clean and already done with | ||
| 276 | * IO when we return. | ||
| 121 | */ | 277 | */ |
| 122 | static int cow_file_range(struct inode *inode, u64 start, u64 end) | 278 | static int cow_file_range(struct inode *inode, struct page *locked_page, |
| 279 | u64 start, u64 end, int *page_started) | ||
| 123 | { | 280 | { |
| 124 | struct btrfs_root *root = BTRFS_I(inode)->root; | 281 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 125 | struct btrfs_trans_handle *trans; | 282 | struct btrfs_trans_handle *trans; |
| 126 | u64 alloc_hint = 0; | 283 | u64 alloc_hint = 0; |
| 127 | u64 num_bytes; | 284 | u64 num_bytes; |
| 285 | unsigned long ram_size; | ||
| 286 | u64 orig_start; | ||
| 287 | u64 disk_num_bytes; | ||
| 128 | u64 cur_alloc_size; | 288 | u64 cur_alloc_size; |
| 129 | u64 blocksize = root->sectorsize; | 289 | u64 blocksize = root->sectorsize; |
| 130 | u64 orig_num_bytes; | 290 | u64 actual_end; |
| 131 | struct btrfs_key ins; | 291 | struct btrfs_key ins; |
| 132 | struct extent_map *em; | 292 | struct extent_map *em; |
| 133 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 293 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
| 134 | int ret = 0; | 294 | int ret = 0; |
| 295 | struct page **pages = NULL; | ||
| 296 | unsigned long nr_pages; | ||
| 297 | unsigned long nr_pages_ret = 0; | ||
| 298 | unsigned long total_compressed = 0; | ||
| 299 | unsigned long total_in = 0; | ||
| 300 | unsigned long max_compressed = 128 * 1024; | ||
| 301 | unsigned long max_uncompressed = 256 * 1024; | ||
| 302 | int i; | ||
| 303 | int will_compress; | ||
| 135 | 304 | ||
| 136 | trans = btrfs_join_transaction(root, 1); | 305 | trans = btrfs_join_transaction(root, 1); |
| 137 | BUG_ON(!trans); | 306 | BUG_ON(!trans); |
| 138 | btrfs_set_trans_block_group(trans, inode); | 307 | btrfs_set_trans_block_group(trans, inode); |
| 308 | orig_start = start; | ||
| 309 | |||
| 310 | /* | ||
| 311 | * compression made this loop a bit ugly, but the basic idea is to | ||
| 312 | * compress some pages but keep the total size of the compressed | ||
| 313 | * extent relatively small. If compression is off, this goto target | ||
| 314 | * is never used. | ||
| 315 | */ | ||
| 316 | again: | ||
| 317 | will_compress = 0; | ||
| 318 | nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; | ||
| 319 | nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE); | ||
| 139 | 320 | ||
| 321 | actual_end = min_t(u64, i_size_read(inode), end + 1); | ||
| 322 | total_compressed = actual_end - start; | ||
| 323 | |||
| 324 | /* we want to make sure that amount of ram required to uncompress | ||
| 325 | * an extent is reasonable, so we limit the total size in ram | ||
| 326 | * of a compressed extent to 256k | ||
| 327 | */ | ||
| 328 | total_compressed = min(total_compressed, max_uncompressed); | ||
| 140 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 329 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
| 141 | num_bytes = max(blocksize, num_bytes); | 330 | num_bytes = max(blocksize, num_bytes); |
| 142 | orig_num_bytes = num_bytes; | 331 | disk_num_bytes = num_bytes; |
| 332 | total_in = 0; | ||
| 333 | ret = 0; | ||
| 143 | 334 | ||
| 144 | if (alloc_hint == EXTENT_MAP_INLINE) | 335 | /* we do compression for mount -o compress and when the |
| 145 | goto out; | 336 | * inode has not been flagged as nocompress |
| 337 | */ | ||
| 338 | if (!btrfs_test_flag(inode, NOCOMPRESS) && | ||
| 339 | btrfs_test_opt(root, COMPRESS)) { | ||
| 340 | WARN_ON(pages); | ||
| 341 | pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | ||
| 342 | |||
| 343 | /* we want to make sure the amount of IO required to satisfy | ||
| 344 | * a random read is reasonably small, so we limit the size | ||
| 345 | * of a compressed extent to 128k | ||
| 346 | */ | ||
| 347 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, | ||
| 348 | total_compressed, pages, | ||
| 349 | nr_pages, &nr_pages_ret, | ||
| 350 | &total_in, | ||
| 351 | &total_compressed, | ||
| 352 | max_compressed); | ||
| 353 | |||
| 354 | if (!ret) { | ||
| 355 | unsigned long offset = total_compressed & | ||
| 356 | (PAGE_CACHE_SIZE - 1); | ||
| 357 | struct page *page = pages[nr_pages_ret - 1]; | ||
| 358 | char *kaddr; | ||
| 359 | |||
| 360 | /* zero the tail end of the last page, we might be | ||
| 361 | * sending it down to disk | ||
| 362 | */ | ||
| 363 | if (offset) { | ||
| 364 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 365 | memset(kaddr + offset, 0, | ||
| 366 | PAGE_CACHE_SIZE - offset); | ||
| 367 | kunmap_atomic(kaddr, KM_USER0); | ||
| 368 | } | ||
| 369 | will_compress = 1; | ||
| 370 | } | ||
| 371 | } | ||
| 372 | if (start == 0) { | ||
| 373 | /* lets try to make an inline extent */ | ||
| 374 | if (ret || total_in < (end - start + 1)) { | ||
| 375 | /* we didn't compress the entire range, try | ||
| 376 | * to make an uncompressed inline extent. This | ||
| 377 | * is almost sure to fail, but maybe inline sizes | ||
| 378 | * will get bigger later | ||
| 379 | */ | ||
| 380 | ret = cow_file_range_inline(trans, root, inode, | ||
| 381 | start, end, 0, NULL); | ||
| 382 | } else { | ||
| 383 | ret = cow_file_range_inline(trans, root, inode, | ||
| 384 | start, end, | ||
| 385 | total_compressed, pages); | ||
| 386 | } | ||
| 387 | if (ret == 0) { | ||
| 388 | extent_clear_unlock_delalloc(inode, | ||
| 389 | &BTRFS_I(inode)->io_tree, | ||
| 390 | start, end, NULL, | ||
| 391 | 1, 1, 1); | ||
| 392 | *page_started = 1; | ||
| 393 | ret = 0; | ||
| 394 | goto free_pages_out; | ||
| 395 | } | ||
| 396 | } | ||
| 397 | |||
| 398 | if (will_compress) { | ||
| 399 | /* | ||
| 400 | * we aren't doing an inline extent round the compressed size | ||
| 401 | * up to a block size boundary so the allocator does sane | ||
| 402 | * things | ||
| 403 | */ | ||
| 404 | total_compressed = (total_compressed + blocksize - 1) & | ||
| 405 | ~(blocksize - 1); | ||
| 406 | |||
| 407 | /* | ||
| 408 | * one last check to make sure the compression is really a | ||
| 409 | * win, compare the page count read with the blocks on disk | ||
| 410 | */ | ||
| 411 | total_in = (total_in + PAGE_CACHE_SIZE - 1) & | ||
| 412 | ~(PAGE_CACHE_SIZE - 1); | ||
| 413 | if (total_compressed >= total_in) { | ||
| 414 | will_compress = 0; | ||
| 415 | } else { | ||
| 416 | disk_num_bytes = total_compressed; | ||
| 417 | num_bytes = total_in; | ||
| 418 | } | ||
| 419 | } | ||
| 420 | if (!will_compress && pages) { | ||
| 421 | /* | ||
| 422 | * the compression code ran but failed to make things smaller, | ||
| 423 | * free any pages it allocated and our page pointer array | ||
| 424 | */ | ||
| 425 | for (i = 0; i < nr_pages_ret; i++) { | ||
| 426 | page_cache_release(pages[i]); | ||
| 427 | } | ||
| 428 | kfree(pages); | ||
| 429 | pages = NULL; | ||
| 430 | total_compressed = 0; | ||
| 431 | nr_pages_ret = 0; | ||
| 432 | |||
| 433 | /* flag the file so we don't compress in the future */ | ||
| 434 | btrfs_set_flag(inode, NOCOMPRESS); | ||
| 435 | } | ||
| 436 | |||
| 437 | BUG_ON(disk_num_bytes > | ||
| 438 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | ||
| 146 | 439 | ||
| 147 | BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); | ||
| 148 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | 440 | mutex_lock(&BTRFS_I(inode)->extent_mutex); |
| 149 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 441 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
| 150 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | 442 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); |
| 151 | 443 | ||
| 152 | while(num_bytes > 0) { | 444 | while(disk_num_bytes > 0) { |
| 153 | cur_alloc_size = min(num_bytes, root->fs_info->max_extent); | 445 | unsigned long min_bytes; |
| 446 | |||
| 447 | /* | ||
| 448 | * the max size of a compressed extent is pretty small, | ||
| 449 | * make the code a little less complex by forcing | ||
| 450 | * the allocator to find a whole compressed extent at once | ||
| 451 | */ | ||
| 452 | if (will_compress) | ||
| 453 | min_bytes = disk_num_bytes; | ||
| 454 | else | ||
| 455 | min_bytes = root->sectorsize; | ||
| 456 | |||
| 457 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); | ||
| 154 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, | 458 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, |
| 155 | root->sectorsize, 0, alloc_hint, | 459 | min_bytes, 0, alloc_hint, |
| 156 | (u64)-1, &ins, 1); | 460 | (u64)-1, &ins, 1); |
| 157 | if (ret) { | 461 | if (ret) { |
| 158 | WARN_ON(1); | 462 | WARN_ON(1); |
| 159 | goto out; | 463 | goto free_pages_out_fail; |
| 160 | } | 464 | } |
| 161 | em = alloc_extent_map(GFP_NOFS); | 465 | em = alloc_extent_map(GFP_NOFS); |
| 162 | em->start = start; | 466 | em->start = start; |
| 163 | em->len = ins.offset; | 467 | |
| 468 | if (will_compress) { | ||
| 469 | ram_size = num_bytes; | ||
| 470 | em->len = num_bytes; | ||
| 471 | } else { | ||
| 472 | /* ramsize == disk size */ | ||
| 473 | ram_size = ins.offset; | ||
| 474 | em->len = ins.offset; | ||
| 475 | } | ||
| 476 | |||
| 164 | em->block_start = ins.objectid; | 477 | em->block_start = ins.objectid; |
| 478 | em->block_len = ins.offset; | ||
| 165 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 479 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 480 | |||
| 166 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | 481 | mutex_lock(&BTRFS_I(inode)->extent_mutex); |
| 167 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 482 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 483 | |||
| 484 | if (will_compress) | ||
| 485 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
| 486 | |||
| 168 | while(1) { | 487 | while(1) { |
| 169 | spin_lock(&em_tree->lock); | 488 | spin_lock(&em_tree->lock); |
| 170 | ret = add_extent_mapping(em_tree, em); | 489 | ret = add_extent_mapping(em_tree, em); |
| @@ -174,26 +493,95 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) | |||
| 174 | break; | 493 | break; |
| 175 | } | 494 | } |
| 176 | btrfs_drop_extent_cache(inode, start, | 495 | btrfs_drop_extent_cache(inode, start, |
| 177 | start + ins.offset - 1, 0); | 496 | start + ram_size - 1, 0); |
| 178 | } | 497 | } |
| 179 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | 498 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); |
| 180 | 499 | ||
| 181 | cur_alloc_size = ins.offset; | 500 | cur_alloc_size = ins.offset; |
| 182 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, | 501 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, |
| 183 | ins.offset, 0); | 502 | ram_size, cur_alloc_size, 0, |
| 503 | will_compress); | ||
| 184 | BUG_ON(ret); | 504 | BUG_ON(ret); |
| 185 | if (num_bytes < cur_alloc_size) { | 505 | |
| 186 | printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, | 506 | if (disk_num_bytes < cur_alloc_size) { |
| 507 | printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes, | ||
| 187 | cur_alloc_size); | 508 | cur_alloc_size); |
| 188 | break; | 509 | break; |
| 189 | } | 510 | } |
| 511 | |||
| 512 | if (will_compress) { | ||
| 513 | /* | ||
| 514 | * we're doing compression, we and we need to | ||
| 515 | * submit the compressed extents down to the device. | ||
| 516 | * | ||
| 517 | * We lock down all the file pages, clearing their | ||
| 518 | * dirty bits and setting them writeback. Everyone | ||
| 519 | * that wants to modify the page will wait on the | ||
| 520 | * ordered extent above. | ||
| 521 | * | ||
| 522 | * The writeback bits on the file pages are | ||
| 523 | * cleared when the compressed pages are on disk | ||
| 524 | */ | ||
| 525 | btrfs_end_transaction(trans, root); | ||
| 526 | |||
| 527 | if (start <= page_offset(locked_page) && | ||
| 528 | page_offset(locked_page) < start + ram_size) { | ||
| 529 | *page_started = 1; | ||
| 530 | } | ||
| 531 | |||
| 532 | extent_clear_unlock_delalloc(inode, | ||
| 533 | &BTRFS_I(inode)->io_tree, | ||
| 534 | start, | ||
| 535 | start + ram_size - 1, | ||
| 536 | NULL, 1, 1, 0); | ||
| 537 | |||
| 538 | ret = btrfs_submit_compressed_write(inode, start, | ||
| 539 | ram_size, ins.objectid, | ||
| 540 | cur_alloc_size, pages, | ||
| 541 | nr_pages_ret); | ||
| 542 | |||
| 543 | BUG_ON(ret); | ||
| 544 | trans = btrfs_join_transaction(root, 1); | ||
| 545 | if (start + ram_size < end) { | ||
| 546 | start += ram_size; | ||
| 547 | alloc_hint = ins.objectid + ins.offset; | ||
| 548 | /* pages will be freed at end_bio time */ | ||
| 549 | pages = NULL; | ||
| 550 | goto again; | ||
| 551 | } else { | ||
| 552 | /* we've written everything, time to go */ | ||
| 553 | break; | ||
| 554 | } | ||
| 555 | } | ||
| 556 | /* we're not doing compressed IO, don't unlock the first | ||
| 557 | * page (which the caller expects to stay locked), don't | ||
| 558 | * clear any dirty bits and don't set any writeback bits | ||
| 559 | */ | ||
| 560 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | ||
| 561 | start, start + ram_size - 1, | ||
| 562 | locked_page, 0, 0, 0); | ||
| 563 | disk_num_bytes -= cur_alloc_size; | ||
| 190 | num_bytes -= cur_alloc_size; | 564 | num_bytes -= cur_alloc_size; |
| 191 | alloc_hint = ins.objectid + ins.offset; | 565 | alloc_hint = ins.objectid + ins.offset; |
| 192 | start += cur_alloc_size; | 566 | start += cur_alloc_size; |
| 193 | } | 567 | } |
| 568 | |||
| 569 | ret = 0; | ||
| 194 | out: | 570 | out: |
| 195 | btrfs_end_transaction(trans, root); | 571 | btrfs_end_transaction(trans, root); |
| 572 | |||
| 196 | return ret; | 573 | return ret; |
| 574 | |||
| 575 | free_pages_out_fail: | ||
| 576 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | ||
| 577 | start, end, locked_page, 0, 0, 0); | ||
| 578 | free_pages_out: | ||
| 579 | for (i = 0; i < nr_pages_ret; i++) | ||
| 580 | page_cache_release(pages[i]); | ||
| 581 | if (pages) | ||
| 582 | kfree(pages); | ||
| 583 | |||
| 584 | goto out; | ||
| 197 | } | 585 | } |
| 198 | 586 | ||
| 199 | /* | 587 | /* |
| @@ -203,7 +591,8 @@ out: | |||
| 203 | * If no cow copies or snapshots exist, we write directly to the existing | 591 | * If no cow copies or snapshots exist, we write directly to the existing |
| 204 | * blocks on disk | 592 | * blocks on disk |
| 205 | */ | 593 | */ |
| 206 | static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) | 594 | static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, |
| 595 | u64 start, u64 end, int *page_started) | ||
| 207 | { | 596 | { |
| 208 | u64 extent_start; | 597 | u64 extent_start; |
| 209 | u64 extent_end; | 598 | u64 extent_end; |
| @@ -260,6 +649,11 @@ again: | |||
| 260 | extent_end = extent_start + extent_num_bytes; | 649 | extent_end = extent_start + extent_num_bytes; |
| 261 | err = 0; | 650 | err = 0; |
| 262 | 651 | ||
| 652 | if (btrfs_file_extent_compression(leaf, item) || | ||
| 653 | btrfs_file_extent_encryption(leaf,item) || | ||
| 654 | btrfs_file_extent_other_encoding(leaf, item)) | ||
| 655 | goto not_found; | ||
| 656 | |||
| 263 | if (loops && start != extent_start) | 657 | if (loops && start != extent_start) |
| 264 | goto not_found; | 658 | goto not_found; |
| 265 | 659 | ||
| @@ -284,7 +678,8 @@ again: | |||
| 284 | bytenr += btrfs_file_extent_offset(leaf, item); | 678 | bytenr += btrfs_file_extent_offset(leaf, item); |
| 285 | extent_num_bytes = min(end + 1, extent_end) - start; | 679 | extent_num_bytes = min(end + 1, extent_end) - start; |
| 286 | ret = btrfs_add_ordered_extent(inode, start, bytenr, | 680 | ret = btrfs_add_ordered_extent(inode, start, bytenr, |
| 287 | extent_num_bytes, 1); | 681 | extent_num_bytes, |
| 682 | extent_num_bytes, 1, 0); | ||
| 288 | if (ret) { | 683 | if (ret) { |
| 289 | err = ret; | 684 | err = ret; |
| 290 | goto out; | 685 | goto out; |
| @@ -300,7 +695,8 @@ again: | |||
| 300 | not_found: | 695 | not_found: |
| 301 | btrfs_end_transaction(trans, root); | 696 | btrfs_end_transaction(trans, root); |
| 302 | btrfs_free_path(path); | 697 | btrfs_free_path(path); |
| 303 | return cow_file_range(inode, start, end); | 698 | return cow_file_range(inode, locked_page, start, end, |
| 699 | page_started); | ||
| 304 | } | 700 | } |
| 305 | out: | 701 | out: |
| 306 | WARN_ON(err); | 702 | WARN_ON(err); |
| @@ -312,16 +708,19 @@ out: | |||
| 312 | /* | 708 | /* |
| 313 | * extent_io.c call back to do delayed allocation processing | 709 | * extent_io.c call back to do delayed allocation processing |
| 314 | */ | 710 | */ |
| 315 | static int run_delalloc_range(struct inode *inode, u64 start, u64 end) | 711 | static int run_delalloc_range(struct inode *inode, struct page *locked_page, |
| 712 | u64 start, u64 end, int *page_started) | ||
| 316 | { | 713 | { |
| 317 | struct btrfs_root *root = BTRFS_I(inode)->root; | 714 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 318 | int ret; | 715 | int ret; |
| 319 | 716 | ||
| 320 | if (btrfs_test_opt(root, NODATACOW) || | 717 | if (btrfs_test_opt(root, NODATACOW) || |
| 321 | btrfs_test_flag(inode, NODATACOW)) | 718 | btrfs_test_flag(inode, NODATACOW)) |
| 322 | ret = run_delalloc_nocow(inode, start, end); | 719 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
| 720 | page_started); | ||
| 323 | else | 721 | else |
| 324 | ret = cow_file_range(inode, start, end); | 722 | ret = cow_file_range(inode, locked_page, start, end, |
| 723 | page_started); | ||
| 325 | 724 | ||
| 326 | return ret; | 725 | return ret; |
| 327 | } | 726 | } |
| @@ -383,7 +782,8 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 383 | * we don't create bios that span stripes or chunks | 782 | * we don't create bios that span stripes or chunks |
| 384 | */ | 783 | */ |
| 385 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 784 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
| 386 | size_t size, struct bio *bio) | 785 | size_t size, struct bio *bio, |
| 786 | unsigned long bio_flags) | ||
| 387 | { | 787 | { |
| 388 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | 788 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; |
| 389 | struct btrfs_mapping_tree *map_tree; | 789 | struct btrfs_mapping_tree *map_tree; |
| @@ -413,7 +813,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
| 413 | * are inserted into the btree | 813 | * are inserted into the btree |
| 414 | */ | 814 | */ |
| 415 | int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 815 | int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
| 416 | int mirror_num) | 816 | int mirror_num, unsigned long bio_flags) |
| 417 | { | 817 | { |
| 418 | struct btrfs_root *root = BTRFS_I(inode)->root; | 818 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 419 | int ret = 0; | 819 | int ret = 0; |
| @@ -429,7 +829,7 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 429 | * or reading the csums from the tree before a read | 829 | * or reading the csums from the tree before a read |
| 430 | */ | 830 | */ |
| 431 | int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 831 | int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
| 432 | int mirror_num) | 832 | int mirror_num, unsigned long bio_flags) |
| 433 | { | 833 | { |
| 434 | struct btrfs_root *root = BTRFS_I(inode)->root; | 834 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 435 | int ret = 0; | 835 | int ret = 0; |
| @@ -444,11 +844,17 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 444 | 844 | ||
| 445 | if (!(rw & (1 << BIO_RW))) { | 845 | if (!(rw & (1 << BIO_RW))) { |
| 446 | btrfs_lookup_bio_sums(root, inode, bio); | 846 | btrfs_lookup_bio_sums(root, inode, bio); |
| 847 | |||
| 848 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | ||
| 849 | return btrfs_submit_compressed_read(inode, bio, | ||
| 850 | mirror_num, bio_flags); | ||
| 851 | } | ||
| 852 | |||
| 447 | goto mapit; | 853 | goto mapit; |
| 448 | } | 854 | } |
| 449 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 855 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
| 450 | inode, rw, bio, mirror_num, | 856 | inode, rw, bio, mirror_num, |
| 451 | __btrfs_submit_bio_hook); | 857 | bio_flags, __btrfs_submit_bio_hook); |
| 452 | mapit: | 858 | mapit: |
| 453 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); | 859 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); |
| 454 | } | 860 | } |
| @@ -539,7 +945,7 @@ out_page: | |||
| 539 | * good idea. This causes problems because we want to make sure COW | 945 | * good idea. This causes problems because we want to make sure COW |
| 540 | * properly happens and the data=ordered rules are followed. | 946 | * properly happens and the data=ordered rules are followed. |
| 541 | * | 947 | * |
| 542 | * In our case any range that doesn't have the EXTENT_ORDERED bit set | 948 | * In our case any range that doesn't have the ORDERED bit set |
| 543 | * hasn't been properly setup for IO. We kick off an async process | 949 | * hasn't been properly setup for IO. We kick off an async process |
| 544 | * to fix it up. The async helper will wait for ordered extents, set | 950 | * to fix it up. The async helper will wait for ordered extents, set |
| 545 | * the delalloc bit and make it safe to write the page. | 951 | * the delalloc bit and make it safe to write the page. |
| @@ -632,10 +1038,21 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 632 | btrfs_set_file_extent_disk_bytenr(leaf, extent_item, | 1038 | btrfs_set_file_extent_disk_bytenr(leaf, extent_item, |
| 633 | ordered_extent->start); | 1039 | ordered_extent->start); |
| 634 | btrfs_set_file_extent_disk_num_bytes(leaf, extent_item, | 1040 | btrfs_set_file_extent_disk_num_bytes(leaf, extent_item, |
| 635 | ordered_extent->len); | 1041 | ordered_extent->disk_len); |
| 636 | btrfs_set_file_extent_offset(leaf, extent_item, 0); | 1042 | btrfs_set_file_extent_offset(leaf, extent_item, 0); |
| 1043 | |||
| 1044 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | ||
| 1045 | btrfs_set_file_extent_compression(leaf, extent_item, 1); | ||
| 1046 | else | ||
| 1047 | btrfs_set_file_extent_compression(leaf, extent_item, 0); | ||
| 1048 | btrfs_set_file_extent_encryption(leaf, extent_item, 0); | ||
| 1049 | btrfs_set_file_extent_other_encoding(leaf, extent_item, 0); | ||
| 1050 | |||
| 1051 | /* ram bytes = extent_num_bytes for now */ | ||
| 637 | btrfs_set_file_extent_num_bytes(leaf, extent_item, | 1052 | btrfs_set_file_extent_num_bytes(leaf, extent_item, |
| 638 | ordered_extent->len); | 1053 | ordered_extent->len); |
| 1054 | btrfs_set_file_extent_ram_bytes(leaf, extent_item, | ||
| 1055 | ordered_extent->len); | ||
| 639 | btrfs_mark_buffer_dirty(leaf); | 1056 | btrfs_mark_buffer_dirty(leaf); |
| 640 | 1057 | ||
| 641 | btrfs_drop_extent_cache(inode, ordered_extent->file_offset, | 1058 | btrfs_drop_extent_cache(inode, ordered_extent->file_offset, |
| @@ -644,7 +1061,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 644 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | 1061 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); |
| 645 | 1062 | ||
| 646 | ins.objectid = ordered_extent->start; | 1063 | ins.objectid = ordered_extent->start; |
| 647 | ins.offset = ordered_extent->len; | 1064 | ins.offset = ordered_extent->disk_len; |
| 648 | ins.type = BTRFS_EXTENT_ITEM_KEY; | 1065 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
| 649 | ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, | 1066 | ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, |
| 650 | root->root_key.objectid, | 1067 | root->root_key.objectid, |
| @@ -714,6 +1131,7 @@ int btrfs_io_failed_hook(struct bio *failed_bio, | |||
| 714 | int ret; | 1131 | int ret; |
| 715 | int rw; | 1132 | int rw; |
| 716 | u64 logical; | 1133 | u64 logical; |
| 1134 | unsigned long bio_flags = 0; | ||
| 717 | 1135 | ||
| 718 | ret = get_state_private(failure_tree, start, &private); | 1136 | ret = get_state_private(failure_tree, start, &private); |
| 719 | if (ret) { | 1137 | if (ret) { |
| @@ -738,6 +1156,8 @@ int btrfs_io_failed_hook(struct bio *failed_bio, | |||
| 738 | } | 1156 | } |
| 739 | logical = start - em->start; | 1157 | logical = start - em->start; |
| 740 | logical = em->block_start + logical; | 1158 | logical = em->block_start + logical; |
| 1159 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | ||
| 1160 | bio_flags = EXTENT_BIO_COMPRESSED; | ||
| 741 | failrec->logical = logical; | 1161 | failrec->logical = logical; |
| 742 | free_extent_map(em); | 1162 | free_extent_map(em); |
| 743 | set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | | 1163 | set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | |
| @@ -781,7 +1201,8 @@ int btrfs_io_failed_hook(struct bio *failed_bio, | |||
| 781 | rw = READ; | 1201 | rw = READ; |
| 782 | 1202 | ||
| 783 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1203 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
| 784 | failrec->last_mirror); | 1204 | failrec->last_mirror, |
| 1205 | bio_flags); | ||
| 785 | return 0; | 1206 | return 0; |
| 786 | } | 1207 | } |
| 787 | 1208 | ||
| @@ -1644,10 +2065,8 @@ search_again: | |||
| 1644 | item_end += | 2065 | item_end += |
| 1645 | btrfs_file_extent_num_bytes(leaf, fi); | 2066 | btrfs_file_extent_num_bytes(leaf, fi); |
| 1646 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 2067 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 1647 | struct btrfs_item *item = btrfs_item_nr(leaf, | ||
| 1648 | path->slots[0]); | ||
| 1649 | item_end += btrfs_file_extent_inline_len(leaf, | 2068 | item_end += btrfs_file_extent_inline_len(leaf, |
| 1650 | item); | 2069 | fi); |
| 1651 | } | 2070 | } |
| 1652 | item_end--; | 2071 | item_end--; |
| 1653 | } | 2072 | } |
| @@ -1715,7 +2134,14 @@ search_again: | |||
| 1715 | root_owner = btrfs_header_owner(leaf); | 2134 | root_owner = btrfs_header_owner(leaf); |
| 1716 | } | 2135 | } |
| 1717 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 2136 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 1718 | if (!del_item) { | 2137 | /* |
| 2138 | * we can't truncate inline items that have had | ||
| 2139 | * special encodings | ||
| 2140 | */ | ||
| 2141 | if (!del_item && | ||
| 2142 | btrfs_file_extent_compression(leaf, fi) == 0 && | ||
| 2143 | btrfs_file_extent_encryption(leaf, fi) == 0 && | ||
| 2144 | btrfs_file_extent_other_encoding(leaf, fi) == 0) { | ||
| 1719 | u32 size = new_size - found_key.offset; | 2145 | u32 size = new_size - found_key.offset; |
| 1720 | 2146 | ||
| 1721 | if (root->ref_cows) { | 2147 | if (root->ref_cows) { |
| @@ -1926,7 +2352,8 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1926 | err = btrfs_insert_file_extent(trans, root, | 2352 | err = btrfs_insert_file_extent(trans, root, |
| 1927 | inode->i_ino, | 2353 | inode->i_ino, |
| 1928 | hole_start, 0, 0, | 2354 | hole_start, 0, 0, |
| 1929 | hole_size, 0); | 2355 | hole_size, 0, hole_size, |
| 2356 | 0, 0, 0); | ||
| 1930 | btrfs_drop_extent_cache(inode, hole_start, | 2357 | btrfs_drop_extent_cache(inode, hole_start, |
| 1931 | (u64)-1, 0); | 2358 | (u64)-1, 0); |
| 1932 | btrfs_check_file(root, inode); | 2359 | btrfs_check_file(root, inode); |
| @@ -2894,11 +3321,50 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, | |||
| 2894 | start_diff = map_start - em->start; | 3321 | start_diff = map_start - em->start; |
| 2895 | em->start = map_start; | 3322 | em->start = map_start; |
| 2896 | em->len = map_len; | 3323 | em->len = map_len; |
| 2897 | if (em->block_start < EXTENT_MAP_LAST_BYTE) | 3324 | if (em->block_start < EXTENT_MAP_LAST_BYTE && |
| 3325 | !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | ||
| 2898 | em->block_start += start_diff; | 3326 | em->block_start += start_diff; |
| 3327 | em->block_len -= start_diff; | ||
| 3328 | } | ||
| 2899 | return add_extent_mapping(em_tree, em); | 3329 | return add_extent_mapping(em_tree, em); |
| 2900 | } | 3330 | } |
| 2901 | 3331 | ||
| 3332 | static noinline int uncompress_inline(struct btrfs_path *path, | ||
| 3333 | struct inode *inode, struct page *page, | ||
| 3334 | size_t pg_offset, u64 extent_offset, | ||
| 3335 | struct btrfs_file_extent_item *item) | ||
| 3336 | { | ||
| 3337 | int ret; | ||
| 3338 | struct extent_buffer *leaf = path->nodes[0]; | ||
| 3339 | char *tmp; | ||
| 3340 | size_t max_size; | ||
| 3341 | unsigned long inline_size; | ||
| 3342 | unsigned long ptr; | ||
| 3343 | |||
| 3344 | WARN_ON(pg_offset != 0); | ||
| 3345 | max_size = btrfs_file_extent_ram_bytes(leaf, item); | ||
| 3346 | inline_size = btrfs_file_extent_inline_item_len(leaf, | ||
| 3347 | btrfs_item_nr(leaf, path->slots[0])); | ||
| 3348 | tmp = kmalloc(inline_size, GFP_NOFS); | ||
| 3349 | ptr = btrfs_file_extent_inline_start(item); | ||
| 3350 | |||
| 3351 | read_extent_buffer(leaf, tmp, ptr, inline_size); | ||
| 3352 | |||
| 3353 | max_size = min(PAGE_CACHE_SIZE, max_size); | ||
| 3354 | ret = btrfs_zlib_decompress(tmp, page, extent_offset, | ||
| 3355 | inline_size, max_size); | ||
| 3356 | if (ret) { | ||
| 3357 | char *kaddr = kmap_atomic(page, KM_USER0); | ||
| 3358 | unsigned long copy_size = min_t(u64, | ||
| 3359 | PAGE_CACHE_SIZE - pg_offset, | ||
| 3360 | max_size - extent_offset); | ||
| 3361 | memset(kaddr + pg_offset, 0, copy_size); | ||
| 3362 | kunmap_atomic(kaddr, KM_USER0); | ||
| 3363 | } | ||
| 3364 | kfree(tmp); | ||
| 3365 | return 0; | ||
| 3366 | } | ||
| 3367 | |||
| 2902 | /* | 3368 | /* |
| 2903 | * a bit scary, this does extent mapping from logical file offset to the disk. | 3369 | * a bit scary, this does extent mapping from logical file offset to the disk. |
| 2904 | * the ugly parts come from merging extents from the disk with the | 3370 | * the ugly parts come from merging extents from the disk with the |
| @@ -2927,6 +3393,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
| 2927 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 3393 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
| 2928 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3394 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 2929 | struct btrfs_trans_handle *trans = NULL; | 3395 | struct btrfs_trans_handle *trans = NULL; |
| 3396 | int compressed; | ||
| 2930 | 3397 | ||
| 2931 | again: | 3398 | again: |
| 2932 | spin_lock(&em_tree->lock); | 3399 | spin_lock(&em_tree->lock); |
| @@ -2951,6 +3418,7 @@ again: | |||
| 2951 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 3418 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 2952 | em->start = EXTENT_MAP_HOLE; | 3419 | em->start = EXTENT_MAP_HOLE; |
| 2953 | em->len = (u64)-1; | 3420 | em->len = (u64)-1; |
| 3421 | em->block_len = (u64)-1; | ||
| 2954 | 3422 | ||
| 2955 | if (!path) { | 3423 | if (!path) { |
| 2956 | path = btrfs_alloc_path(); | 3424 | path = btrfs_alloc_path(); |
| @@ -2983,6 +3451,7 @@ again: | |||
| 2983 | 3451 | ||
| 2984 | found_type = btrfs_file_extent_type(leaf, item); | 3452 | found_type = btrfs_file_extent_type(leaf, item); |
| 2985 | extent_start = found_key.offset; | 3453 | extent_start = found_key.offset; |
| 3454 | compressed = btrfs_file_extent_compression(leaf, item); | ||
| 2986 | if (found_type == BTRFS_FILE_EXTENT_REG) { | 3455 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
| 2987 | extent_end = extent_start + | 3456 | extent_end = extent_start + |
| 2988 | btrfs_file_extent_num_bytes(leaf, item); | 3457 | btrfs_file_extent_num_bytes(leaf, item); |
| @@ -3005,10 +3474,18 @@ again: | |||
| 3005 | em->block_start = EXTENT_MAP_HOLE; | 3474 | em->block_start = EXTENT_MAP_HOLE; |
| 3006 | goto insert; | 3475 | goto insert; |
| 3007 | } | 3476 | } |
| 3008 | bytenr += btrfs_file_extent_offset(leaf, item); | ||
| 3009 | em->block_start = bytenr; | ||
| 3010 | em->start = extent_start; | 3477 | em->start = extent_start; |
| 3011 | em->len = extent_end - extent_start; | 3478 | em->len = extent_end - extent_start; |
| 3479 | if (compressed) { | ||
| 3480 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
| 3481 | em->block_start = bytenr; | ||
| 3482 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | ||
| 3483 | item); | ||
| 3484 | } else { | ||
| 3485 | bytenr += btrfs_file_extent_offset(leaf, item); | ||
| 3486 | em->block_start = bytenr; | ||
| 3487 | em->block_len = em->len; | ||
| 3488 | } | ||
| 3012 | goto insert; | 3489 | goto insert; |
| 3013 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 3490 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
| 3014 | u64 page_start; | 3491 | u64 page_start; |
| @@ -3018,8 +3495,7 @@ again: | |||
| 3018 | size_t extent_offset; | 3495 | size_t extent_offset; |
| 3019 | size_t copy_size; | 3496 | size_t copy_size; |
| 3020 | 3497 | ||
| 3021 | size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, | 3498 | size = btrfs_file_extent_inline_len(leaf, item); |
| 3022 | path->slots[0])); | ||
| 3023 | extent_end = (extent_start + size + root->sectorsize - 1) & | 3499 | extent_end = (extent_start + size + root->sectorsize - 1) & |
| 3024 | ~((u64)root->sectorsize - 1); | 3500 | ~((u64)root->sectorsize - 1); |
| 3025 | if (start < extent_start || start >= extent_end) { | 3501 | if (start < extent_start || start >= extent_end) { |
| @@ -3035,9 +3511,10 @@ again: | |||
| 3035 | } | 3511 | } |
| 3036 | em->block_start = EXTENT_MAP_INLINE; | 3512 | em->block_start = EXTENT_MAP_INLINE; |
| 3037 | 3513 | ||
| 3038 | if (!page) { | 3514 | if (!page || create) { |
| 3039 | em->start = extent_start; | 3515 | em->start = extent_start; |
| 3040 | em->len = size; | 3516 | em->len = (size + root->sectorsize - 1) & |
| 3517 | ~((u64)root->sectorsize - 1); | ||
| 3041 | goto out; | 3518 | goto out; |
| 3042 | } | 3519 | } |
| 3043 | 3520 | ||
| @@ -3048,11 +3525,22 @@ again: | |||
| 3048 | em->start = extent_start + extent_offset; | 3525 | em->start = extent_start + extent_offset; |
| 3049 | em->len = (copy_size + root->sectorsize - 1) & | 3526 | em->len = (copy_size + root->sectorsize - 1) & |
| 3050 | ~((u64)root->sectorsize - 1); | 3527 | ~((u64)root->sectorsize - 1); |
| 3051 | map = kmap(page); | 3528 | if (compressed) |
| 3529 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
| 3052 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 3530 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
| 3053 | if (create == 0 && !PageUptodate(page)) { | 3531 | if (create == 0 && !PageUptodate(page)) { |
| 3054 | read_extent_buffer(leaf, map + pg_offset, ptr, | 3532 | if (btrfs_file_extent_compression(leaf, item) == |
| 3055 | copy_size); | 3533 | BTRFS_COMPRESS_ZLIB) { |
| 3534 | ret = uncompress_inline(path, inode, page, | ||
| 3535 | pg_offset, | ||
| 3536 | extent_offset, item); | ||
| 3537 | BUG_ON(ret); | ||
| 3538 | } else { | ||
| 3539 | map = kmap(page); | ||
| 3540 | read_extent_buffer(leaf, map + pg_offset, ptr, | ||
| 3541 | copy_size); | ||
| 3542 | kunmap(page); | ||
| 3543 | } | ||
| 3056 | flush_dcache_page(page); | 3544 | flush_dcache_page(page); |
| 3057 | } else if (create && PageUptodate(page)) { | 3545 | } else if (create && PageUptodate(page)) { |
| 3058 | if (!trans) { | 3546 | if (!trans) { |
| @@ -3063,11 +3551,12 @@ again: | |||
| 3063 | trans = btrfs_join_transaction(root, 1); | 3551 | trans = btrfs_join_transaction(root, 1); |
| 3064 | goto again; | 3552 | goto again; |
| 3065 | } | 3553 | } |
| 3554 | map = kmap(page); | ||
| 3066 | write_extent_buffer(leaf, map + pg_offset, ptr, | 3555 | write_extent_buffer(leaf, map + pg_offset, ptr, |
| 3067 | copy_size); | 3556 | copy_size); |
| 3557 | kunmap(page); | ||
| 3068 | btrfs_mark_buffer_dirty(leaf); | 3558 | btrfs_mark_buffer_dirty(leaf); |
| 3069 | } | 3559 | } |
| 3070 | kunmap(page); | ||
| 3071 | set_extent_uptodate(io_tree, em->start, | 3560 | set_extent_uptodate(io_tree, em->start, |
| 3072 | extent_map_end(em) - 1, GFP_NOFS); | 3561 | extent_map_end(em) - 1, GFP_NOFS); |
| 3073 | goto insert; | 3562 | goto insert; |
| @@ -3779,6 +4268,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 3779 | btrfs_set_file_extent_generation(leaf, ei, trans->transid); | 4268 | btrfs_set_file_extent_generation(leaf, ei, trans->transid); |
| 3780 | btrfs_set_file_extent_type(leaf, ei, | 4269 | btrfs_set_file_extent_type(leaf, ei, |
| 3781 | BTRFS_FILE_EXTENT_INLINE); | 4270 | BTRFS_FILE_EXTENT_INLINE); |
| 4271 | btrfs_set_file_extent_encryption(leaf, ei, 0); | ||
| 4272 | btrfs_set_file_extent_compression(leaf, ei, 0); | ||
| 4273 | btrfs_set_file_extent_other_encoding(leaf, ei, 0); | ||
| 4274 | btrfs_set_file_extent_ram_bytes(leaf, ei, name_len); | ||
| 4275 | |||
| 3782 | ptr = btrfs_file_extent_inline_start(ei); | 4276 | ptr = btrfs_file_extent_inline_start(ei); |
| 3783 | write_extent_buffer(leaf, symname, ptr, name_len); | 4277 | write_extent_buffer(leaf, symname, ptr, name_len); |
| 3784 | btrfs_mark_buffer_dirty(leaf); | 4278 | btrfs_mark_buffer_dirty(leaf); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2eb6caba57c..b5745bb96d4 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -165,7 +165,8 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
| 165 | * inserted. | 165 | * inserted. |
| 166 | */ | 166 | */ |
| 167 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 167 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
| 168 | u64 start, u64 len, int nocow) | 168 | u64 start, u64 len, u64 disk_len, int nocow, |
| 169 | int compressed) | ||
| 169 | { | 170 | { |
| 170 | struct btrfs_ordered_inode_tree *tree; | 171 | struct btrfs_ordered_inode_tree *tree; |
| 171 | struct rb_node *node; | 172 | struct rb_node *node; |
| @@ -180,9 +181,12 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 180 | entry->file_offset = file_offset; | 181 | entry->file_offset = file_offset; |
| 181 | entry->start = start; | 182 | entry->start = start; |
| 182 | entry->len = len; | 183 | entry->len = len; |
| 184 | entry->disk_len = disk_len; | ||
| 183 | entry->inode = inode; | 185 | entry->inode = inode; |
| 184 | if (nocow) | 186 | if (nocow) |
| 185 | set_bit(BTRFS_ORDERED_NOCOW, &entry->flags); | 187 | set_bit(BTRFS_ORDERED_NOCOW, &entry->flags); |
| 188 | if (compressed) | ||
| 189 | set_bit(BTRFS_ORDERED_COMPRESSED, &entry->flags); | ||
| 186 | 190 | ||
| 187 | /* one ref for the tree */ | 191 | /* one ref for the tree */ |
| 188 | atomic_set(&entry->refs, 1); | 192 | atomic_set(&entry->refs, 1); |
| @@ -389,9 +393,10 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
| 389 | * for pdflush to find them | 393 | * for pdflush to find them |
| 390 | */ | 394 | */ |
| 391 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); | 395 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); |
| 392 | if (wait) | 396 | if (wait) { |
| 393 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 397 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
| 394 | &entry->flags)); | 398 | &entry->flags)); |
| 399 | } | ||
| 395 | } | 400 | } |
| 396 | 401 | ||
| 397 | /* | 402 | /* |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index f50f8870a14..1ef464145d2 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -66,6 +66,8 @@ struct btrfs_ordered_sum { | |||
| 66 | 66 | ||
| 67 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ | 67 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ |
| 68 | 68 | ||
| 69 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ | ||
| 70 | |||
| 69 | struct btrfs_ordered_extent { | 71 | struct btrfs_ordered_extent { |
| 70 | /* logical offset in the file */ | 72 | /* logical offset in the file */ |
| 71 | u64 file_offset; | 73 | u64 file_offset; |
| @@ -73,9 +75,12 @@ struct btrfs_ordered_extent { | |||
| 73 | /* disk byte number */ | 75 | /* disk byte number */ |
| 74 | u64 start; | 76 | u64 start; |
| 75 | 77 | ||
| 76 | /* length of the extent in bytes */ | 78 | /* ram length of the extent in bytes */ |
| 77 | u64 len; | 79 | u64 len; |
| 78 | 80 | ||
| 81 | /* extent length on disk */ | ||
| 82 | u64 disk_len; | ||
| 83 | |||
| 79 | /* flags (described above) */ | 84 | /* flags (described above) */ |
| 80 | unsigned long flags; | 85 | unsigned long flags; |
| 81 | 86 | ||
| @@ -127,7 +132,8 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
| 127 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 132 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
| 128 | u64 file_offset, u64 io_size); | 133 | u64 file_offset, u64 io_size); |
| 129 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 134 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
| 130 | u64 start, u64 len, int nocow); | 135 | u64 start, u64 len, u64 disk_len, int nocow, |
| 136 | int compressed); | ||
| 131 | int btrfs_add_ordered_sum(struct inode *inode, | 137 | int btrfs_add_ordered_sum(struct inode *inode, |
| 132 | struct btrfs_ordered_extent *entry, | 138 | struct btrfs_ordered_extent *entry, |
| 133 | struct btrfs_ordered_sum *sum); | 139 | struct btrfs_ordered_sum *sum); |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index bd9ab3e9a7f..64725c13aa1 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
| @@ -115,15 +115,16 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
| 115 | if (btrfs_file_extent_type(l, fi) == | 115 | if (btrfs_file_extent_type(l, fi) == |
| 116 | BTRFS_FILE_EXTENT_INLINE) { | 116 | BTRFS_FILE_EXTENT_INLINE) { |
| 117 | printk("\t\tinline extent data size %u\n", | 117 | printk("\t\tinline extent data size %u\n", |
| 118 | btrfs_file_extent_inline_len(l, item)); | 118 | btrfs_file_extent_inline_len(l, fi)); |
| 119 | break; | 119 | break; |
| 120 | } | 120 | } |
| 121 | printk("\t\textent data disk bytenr %llu nr %llu\n", | 121 | printk("\t\textent data disk bytenr %llu nr %llu\n", |
| 122 | (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), | 122 | (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), |
| 123 | (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); | 123 | (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); |
| 124 | printk("\t\textent data offset %llu nr %llu\n", | 124 | printk("\t\textent data offset %llu nr %llu ram %llu\n", |
| 125 | (unsigned long long)btrfs_file_extent_offset(l, fi), | 125 | (unsigned long long)btrfs_file_extent_offset(l, fi), |
| 126 | (unsigned long long)btrfs_file_extent_num_bytes(l, fi)); | 126 | (unsigned long long)btrfs_file_extent_num_bytes(l, fi), |
| 127 | (unsigned long long)btrfs_file_extent_ram_bytes(l, fi)); | ||
| 127 | break; | 128 | break; |
| 128 | case BTRFS_BLOCK_GROUP_ITEM_KEY: | 129 | case BTRFS_BLOCK_GROUP_ITEM_KEY: |
| 129 | bi = btrfs_item_ptr(l, i, | 130 | bi = btrfs_item_ptr(l, i, |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2e6039825b7..431fdf144b5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | #include "volumes.h" | 47 | #include "volumes.h" |
| 48 | #include "version.h" | 48 | #include "version.h" |
| 49 | #include "export.h" | 49 | #include "export.h" |
| 50 | #include "compression.h" | ||
| 50 | 51 | ||
| 51 | #define BTRFS_SUPER_MAGIC 0x9123683E | 52 | #define BTRFS_SUPER_MAGIC 0x9123683E |
| 52 | 53 | ||
| @@ -69,7 +70,7 @@ static void btrfs_put_super (struct super_block * sb) | |||
| 69 | enum { | 70 | enum { |
| 70 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 71 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
| 71 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 72 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
| 72 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_err, | 73 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err, |
| 73 | }; | 74 | }; |
| 74 | 75 | ||
| 75 | static match_table_t tokens = { | 76 | static match_table_t tokens = { |
| @@ -83,6 +84,7 @@ static match_table_t tokens = { | |||
| 83 | {Opt_max_inline, "max_inline=%s"}, | 84 | {Opt_max_inline, "max_inline=%s"}, |
| 84 | {Opt_alloc_start, "alloc_start=%s"}, | 85 | {Opt_alloc_start, "alloc_start=%s"}, |
| 85 | {Opt_thread_pool, "thread_pool=%d"}, | 86 | {Opt_thread_pool, "thread_pool=%d"}, |
| 87 | {Opt_compress, "compress"}, | ||
| 86 | {Opt_ssd, "ssd"}, | 88 | {Opt_ssd, "ssd"}, |
| 87 | {Opt_noacl, "noacl"}, | 89 | {Opt_noacl, "noacl"}, |
| 88 | {Opt_err, NULL}, | 90 | {Opt_err, NULL}, |
| @@ -163,6 +165,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 163 | btrfs_set_opt(info->mount_opt, NODATACOW); | 165 | btrfs_set_opt(info->mount_opt, NODATACOW); |
| 164 | btrfs_set_opt(info->mount_opt, NODATASUM); | 166 | btrfs_set_opt(info->mount_opt, NODATASUM); |
| 165 | break; | 167 | break; |
| 168 | case Opt_compress: | ||
| 169 | printk(KERN_INFO "btrfs: use compression\n"); | ||
| 170 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
| 171 | break; | ||
| 166 | case Opt_ssd: | 172 | case Opt_ssd: |
| 167 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 173 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
| 168 | btrfs_set_opt(info->mount_opt, SSD); | 174 | btrfs_set_opt(info->mount_opt, SSD); |
| @@ -622,6 +628,7 @@ static int __init init_btrfs_fs(void) | |||
| 622 | err = btrfs_interface_init(); | 628 | err = btrfs_interface_init(); |
| 623 | if (err) | 629 | if (err) |
| 624 | goto free_extent_map; | 630 | goto free_extent_map; |
| 631 | |||
| 625 | err = register_filesystem(&btrfs_fs_type); | 632 | err = register_filesystem(&btrfs_fs_type); |
| 626 | if (err) | 633 | if (err) |
| 627 | goto unregister_ioctl; | 634 | goto unregister_ioctl; |
| @@ -651,6 +658,7 @@ static void __exit exit_btrfs_fs(void) | |||
| 651 | unregister_filesystem(&btrfs_fs_type); | 658 | unregister_filesystem(&btrfs_fs_type); |
| 652 | btrfs_exit_sysfs(); | 659 | btrfs_exit_sysfs(); |
| 653 | btrfs_cleanup_fs_uuids(); | 660 | btrfs_cleanup_fs_uuids(); |
| 661 | btrfs_zlib_exit(); | ||
| 654 | } | 662 | } |
| 655 | 663 | ||
| 656 | module_init(init_btrfs_fs) | 664 | module_init(init_btrfs_fs) |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cf618cc8b34..e6d579053a4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -540,8 +540,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 540 | if (found_type == BTRFS_FILE_EXTENT_REG) | 540 | if (found_type == BTRFS_FILE_EXTENT_REG) |
| 541 | extent_end = start + btrfs_file_extent_num_bytes(eb, item); | 541 | extent_end = start + btrfs_file_extent_num_bytes(eb, item); |
| 542 | else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 542 | else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
| 543 | size = btrfs_file_extent_inline_len(eb, | 543 | size = btrfs_file_extent_inline_len(eb, item); |
| 544 | btrfs_item_nr(eb, slot)); | ||
| 545 | extent_end = (start + size + mask) & ~mask; | 544 | extent_end = (start + size + mask) & ~mask; |
| 546 | } else { | 545 | } else { |
| 547 | ret = 0; | 546 | ret = 0; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2eed7f91f51..7db4cfd03a9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -1816,6 +1816,7 @@ again: | |||
| 1816 | em->start = key.offset; | 1816 | em->start = key.offset; |
| 1817 | em->len = *num_bytes; | 1817 | em->len = *num_bytes; |
| 1818 | em->block_start = 0; | 1818 | em->block_start = 0; |
| 1819 | em->block_len = em->len; | ||
| 1819 | 1820 | ||
| 1820 | if (type & BTRFS_BLOCK_GROUP_SYSTEM) { | 1821 | if (type & BTRFS_BLOCK_GROUP_SYSTEM) { |
| 1821 | ret = btrfs_add_system_chunk(trans, chunk_root, &key, | 1822 | ret = btrfs_add_system_chunk(trans, chunk_root, &key, |
| @@ -2323,6 +2324,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 2323 | em->start = logical; | 2324 | em->start = logical; |
| 2324 | em->len = length; | 2325 | em->len = length; |
| 2325 | em->block_start = 0; | 2326 | em->block_start = 0; |
| 2327 | em->block_len = em->len; | ||
| 2326 | 2328 | ||
| 2327 | map->num_stripes = num_stripes; | 2329 | map->num_stripes = num_stripes; |
| 2328 | map->io_width = btrfs_chunk_io_width(leaf, chunk); | 2330 | map->io_width = btrfs_chunk_io_width(leaf, chunk); |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c new file mode 100644 index 00000000000..e99309180a1 --- /dev/null +++ b/fs/btrfs/zlib.c | |||
| @@ -0,0 +1,637 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | * | ||
| 18 | * Based on jffs2 zlib code: | ||
| 19 | * Copyright © 2001-2007 Red Hat, Inc. | ||
| 20 | * Created by David Woodhouse <dwmw2@infradead.org> | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/kernel.h> | ||
| 24 | #include <linux/slab.h> | ||
| 25 | #include <linux/zlib.h> | ||
| 26 | #include <linux/zutil.h> | ||
| 27 | #include <linux/vmalloc.h> | ||
| 28 | #include <linux/init.h> | ||
| 29 | #include <linux/err.h> | ||
| 30 | #include <linux/sched.h> | ||
| 31 | #include <linux/pagemap.h> | ||
| 32 | #include <linux/bio.h> | ||
| 33 | |||
| 34 | /* Plan: call deflate() with avail_in == *sourcelen, | ||
| 35 | avail_out = *dstlen - 12 and flush == Z_FINISH. | ||
| 36 | If it doesn't manage to finish, call it again with | ||
| 37 | avail_in == 0 and avail_out set to the remaining 12 | ||
| 38 | bytes for it to clean up. | ||
| 39 | Q: Is 12 bytes sufficient? | ||
| 40 | */ | ||
| 41 | #define STREAM_END_SPACE 12 | ||
| 42 | |||
| 43 | struct workspace { | ||
| 44 | z_stream inf_strm; | ||
| 45 | z_stream def_strm; | ||
| 46 | char *buf; | ||
| 47 | struct list_head list; | ||
| 48 | }; | ||
| 49 | |||
| 50 | static LIST_HEAD(idle_workspace); | ||
| 51 | static DEFINE_SPINLOCK(workspace_lock); | ||
| 52 | static unsigned long num_workspace; | ||
| 53 | static atomic_t alloc_workspace = ATOMIC_INIT(0); | ||
| 54 | static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); | ||
| 55 | |||
| 56 | /* | ||
| 57 | * this finds an available zlib workspace or allocates a new one | ||
| 58 | * NULL or an ERR_PTR is returned if things go bad. | ||
| 59 | */ | ||
| 60 | static struct workspace *find_zlib_workspace(void) | ||
| 61 | { | ||
| 62 | struct workspace *workspace; | ||
| 63 | int ret; | ||
| 64 | int cpus = num_online_cpus(); | ||
| 65 | |||
| 66 | again: | ||
| 67 | spin_lock(&workspace_lock); | ||
| 68 | if (!list_empty(&idle_workspace)) { | ||
| 69 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
| 70 | list); | ||
| 71 | list_del(&workspace->list); | ||
| 72 | num_workspace--; | ||
| 73 | spin_unlock(&workspace_lock); | ||
| 74 | return workspace; | ||
| 75 | |||
| 76 | } | ||
| 77 | spin_unlock(&workspace_lock); | ||
| 78 | if (atomic_read(&alloc_workspace) > cpus) { | ||
| 79 | DEFINE_WAIT(wait); | ||
| 80 | prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
| 81 | if (atomic_read(&alloc_workspace) > cpus) | ||
| 82 | schedule(); | ||
| 83 | finish_wait(&workspace_wait, &wait); | ||
| 84 | goto again; | ||
| 85 | } | ||
| 86 | atomic_inc(&alloc_workspace); | ||
| 87 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
| 88 | if (!workspace) { | ||
| 89 | ret = -ENOMEM; | ||
| 90 | goto fail; | ||
| 91 | } | ||
| 92 | |||
| 93 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); | ||
| 94 | if (!workspace->def_strm.workspace) { | ||
| 95 | ret = -ENOMEM; | ||
| 96 | goto fail; | ||
| 97 | } | ||
| 98 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | ||
| 99 | if (!workspace->inf_strm.workspace) { | ||
| 100 | ret = -ENOMEM; | ||
| 101 | goto fail_inflate; | ||
| 102 | } | ||
| 103 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | ||
| 104 | if (!workspace->buf) { | ||
| 105 | ret = -ENOMEM; | ||
| 106 | goto fail_kmalloc; | ||
| 107 | } | ||
| 108 | return workspace; | ||
| 109 | |||
| 110 | fail_kmalloc: | ||
| 111 | vfree(workspace->inf_strm.workspace); | ||
| 112 | fail_inflate: | ||
| 113 | vfree(workspace->def_strm.workspace); | ||
| 114 | fail: | ||
| 115 | kfree(workspace); | ||
| 116 | atomic_dec(&alloc_workspace); | ||
| 117 | wake_up(&workspace_wait); | ||
| 118 | return ERR_PTR(ret); | ||
| 119 | } | ||
| 120 | |||
| 121 | /* | ||
| 122 | * put a workspace struct back on the list or free it if we have enough | ||
| 123 | * idle ones sitting around | ||
| 124 | */ | ||
| 125 | static int free_workspace(struct workspace *workspace) | ||
| 126 | { | ||
| 127 | spin_lock(&workspace_lock); | ||
| 128 | if (num_workspace < num_online_cpus()) { | ||
| 129 | list_add_tail(&workspace->list, &idle_workspace); | ||
| 130 | num_workspace++; | ||
| 131 | spin_unlock(&workspace_lock); | ||
| 132 | if (waitqueue_active(&workspace_wait)) | ||
| 133 | wake_up(&workspace_wait); | ||
| 134 | return 0; | ||
| 135 | } | ||
| 136 | spin_unlock(&workspace_lock); | ||
| 137 | vfree(workspace->def_strm.workspace); | ||
| 138 | vfree(workspace->inf_strm.workspace); | ||
| 139 | kfree(workspace->buf); | ||
| 140 | kfree(workspace); | ||
| 141 | |||
| 142 | atomic_dec(&alloc_workspace); | ||
| 143 | if (waitqueue_active(&workspace_wait)) | ||
| 144 | wake_up(&workspace_wait); | ||
| 145 | return 0; | ||
| 146 | } | ||
| 147 | |||
| 148 | /* | ||
| 149 | * cleanup function for module exit | ||
| 150 | */ | ||
| 151 | static void free_workspaces(void) | ||
| 152 | { | ||
| 153 | struct workspace *workspace; | ||
| 154 | while(!list_empty(&idle_workspace)) { | ||
| 155 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
| 156 | list); | ||
| 157 | list_del(&workspace->list); | ||
| 158 | vfree(workspace->def_strm.workspace); | ||
| 159 | vfree(workspace->inf_strm.workspace); | ||
| 160 | kfree(workspace->buf); | ||
| 161 | kfree(workspace); | ||
| 162 | atomic_dec(&alloc_workspace); | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 166 | /* | ||
| 167 | * given an address space and start/len, compress the bytes. | ||
| 168 | * | ||
| 169 | * pages are allocated to hold the compressed result and stored | ||
| 170 | * in 'pages' | ||
| 171 | * | ||
| 172 | * out_pages is used to return the number of pages allocated. There | ||
| 173 | * may be pages allocated even if we return an error | ||
| 174 | * | ||
| 175 | * total_in is used to return the number of bytes actually read. It | ||
| 176 | * may be smaller then len if we had to exit early because we | ||
| 177 | * ran out of room in the pages array or because we cross the | ||
| 178 | * max_out threshold. | ||
| 179 | * | ||
| 180 | * total_out is used to return the total number of compressed bytes | ||
| 181 | * | ||
| 182 | * max_out tells us the max number of bytes that we're allowed to | ||
| 183 | * stuff into pages | ||
| 184 | */ | ||
| 185 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
| 186 | u64 start, unsigned long len, | ||
| 187 | struct page **pages, | ||
| 188 | unsigned long nr_dest_pages, | ||
| 189 | unsigned long *out_pages, | ||
| 190 | unsigned long *total_in, | ||
| 191 | unsigned long *total_out, | ||
| 192 | unsigned long max_out) | ||
| 193 | { | ||
| 194 | int ret; | ||
| 195 | struct workspace *workspace; | ||
| 196 | char *data_in; | ||
| 197 | char *cpage_out; | ||
| 198 | int nr_pages = 0; | ||
| 199 | struct page *in_page = NULL; | ||
| 200 | struct page *out_page = NULL; | ||
| 201 | int out_written = 0; | ||
| 202 | int in_read = 0; | ||
| 203 | unsigned long bytes_left; | ||
| 204 | |||
| 205 | *out_pages = 0; | ||
| 206 | *total_out = 0; | ||
| 207 | *total_in = 0; | ||
| 208 | |||
| 209 | workspace = find_zlib_workspace(); | ||
| 210 | if (!workspace) | ||
| 211 | return -1; | ||
| 212 | |||
| 213 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | ||
| 214 | printk(KERN_WARNING "deflateInit failed\n"); | ||
| 215 | ret = -1; | ||
| 216 | goto out; | ||
| 217 | } | ||
| 218 | |||
| 219 | workspace->def_strm.total_in = 0; | ||
| 220 | workspace->def_strm.total_out = 0; | ||
| 221 | |||
| 222 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
| 223 | data_in = kmap(in_page); | ||
| 224 | |||
| 225 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
| 226 | cpage_out = kmap(out_page); | ||
| 227 | pages[0] = out_page; | ||
| 228 | nr_pages = 1; | ||
| 229 | |||
| 230 | workspace->def_strm.next_in = data_in; | ||
| 231 | workspace->def_strm.next_out = cpage_out; | ||
| 232 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; | ||
| 233 | workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); | ||
| 234 | |||
| 235 | out_written = 0; | ||
| 236 | in_read = 0; | ||
| 237 | |||
| 238 | while (workspace->def_strm.total_in < len) { | ||
| 239 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); | ||
| 240 | if (ret != Z_OK) { | ||
| 241 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | ||
| 242 | ret); | ||
| 243 | zlib_deflateEnd(&workspace->def_strm); | ||
| 244 | ret = -1; | ||
| 245 | goto out; | ||
| 246 | } | ||
| 247 | |||
| 248 | /* we're making it bigger, give up */ | ||
| 249 | if (workspace->def_strm.total_in > 8192 && | ||
| 250 | workspace->def_strm.total_in < | ||
| 251 | workspace->def_strm.total_out) { | ||
| 252 | ret = -1; | ||
| 253 | goto out; | ||
| 254 | } | ||
| 255 | /* we need another page for writing out. Test this | ||
| 256 | * before the total_in so we will pull in a new page for | ||
| 257 | * the stream end if required | ||
| 258 | */ | ||
| 259 | if (workspace->def_strm.avail_out == 0) { | ||
| 260 | kunmap(out_page); | ||
| 261 | if (nr_pages == nr_dest_pages) { | ||
| 262 | out_page = NULL; | ||
| 263 | ret = -1; | ||
| 264 | goto out; | ||
| 265 | } | ||
| 266 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
| 267 | cpage_out = kmap(out_page); | ||
| 268 | pages[nr_pages] = out_page; | ||
| 269 | nr_pages++; | ||
| 270 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; | ||
| 271 | workspace->def_strm.next_out = cpage_out; | ||
| 272 | } | ||
| 273 | /* we're all done */ | ||
| 274 | if (workspace->def_strm.total_in >= len) | ||
| 275 | break; | ||
| 276 | |||
| 277 | /* we've read in a full page, get a new one */ | ||
| 278 | if (workspace->def_strm.avail_in == 0) { | ||
| 279 | if (workspace->def_strm.total_out > max_out) | ||
| 280 | break; | ||
| 281 | |||
| 282 | bytes_left = len - workspace->def_strm.total_in; | ||
| 283 | kunmap(in_page); | ||
| 284 | page_cache_release(in_page); | ||
| 285 | |||
| 286 | start += PAGE_CACHE_SIZE; | ||
| 287 | in_page = find_get_page(mapping, | ||
| 288 | start >> PAGE_CACHE_SHIFT); | ||
| 289 | data_in = kmap(in_page); | ||
| 290 | workspace->def_strm.avail_in = min(bytes_left, | ||
| 291 | PAGE_CACHE_SIZE); | ||
| 292 | workspace->def_strm.next_in = data_in; | ||
| 293 | } | ||
| 294 | } | ||
| 295 | workspace->def_strm.avail_in = 0; | ||
| 296 | ret = zlib_deflate(&workspace->def_strm, Z_FINISH); | ||
| 297 | zlib_deflateEnd(&workspace->def_strm); | ||
| 298 | |||
| 299 | if (ret != Z_STREAM_END) { | ||
| 300 | ret = -1; | ||
| 301 | goto out; | ||
| 302 | } | ||
| 303 | |||
| 304 | if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { | ||
| 305 | ret = -1; | ||
| 306 | goto out; | ||
| 307 | } | ||
| 308 | |||
| 309 | ret = 0; | ||
| 310 | *total_out = workspace->def_strm.total_out; | ||
| 311 | *total_in = workspace->def_strm.total_in; | ||
| 312 | out: | ||
| 313 | *out_pages = nr_pages; | ||
| 314 | if (out_page) | ||
| 315 | kunmap(out_page); | ||
| 316 | |||
| 317 | if (in_page) { | ||
| 318 | kunmap(in_page); | ||
| 319 | page_cache_release(in_page); | ||
| 320 | } | ||
| 321 | free_workspace(workspace); | ||
| 322 | return ret; | ||
| 323 | } | ||
| 324 | |||
| 325 | /* | ||
| 326 | * pages_in is an array of pages with compressed data. | ||
| 327 | * | ||
| 328 | * disk_start is the starting logical offset of this array in the file | ||
| 329 | * | ||
| 330 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
| 331 | * | ||
| 332 | * vcnt is the count of pages in the biovec | ||
| 333 | * | ||
| 334 | * srclen is the number of bytes in pages_in | ||
| 335 | * | ||
| 336 | * The basic idea is that we have a bio that was created by readpages. | ||
| 337 | * The pages in the bio are for the uncompressed data, and they may not | ||
| 338 | * be contiguous. They all correspond to the range of bytes covered by | ||
| 339 | * the compressed extent. | ||
| 340 | */ | ||
| 341 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
| 342 | u64 disk_start, | ||
| 343 | struct bio_vec *bvec, | ||
| 344 | int vcnt, | ||
| 345 | size_t srclen) | ||
| 346 | { | ||
| 347 | int ret = 0; | ||
| 348 | int wbits = MAX_WBITS; | ||
| 349 | struct workspace *workspace; | ||
| 350 | char *data_in; | ||
| 351 | size_t total_out = 0; | ||
| 352 | unsigned long page_bytes_left; | ||
| 353 | unsigned long page_in_index = 0; | ||
| 354 | unsigned long page_out_index = 0; | ||
| 355 | struct page *page_out; | ||
| 356 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | ||
| 357 | PAGE_CACHE_SIZE; | ||
| 358 | unsigned long buf_start; | ||
| 359 | unsigned long buf_offset; | ||
| 360 | unsigned long bytes; | ||
| 361 | unsigned long working_bytes; | ||
| 362 | unsigned long pg_offset; | ||
| 363 | unsigned long start_byte; | ||
| 364 | unsigned long current_buf_start; | ||
| 365 | char *kaddr; | ||
| 366 | |||
| 367 | workspace = find_zlib_workspace(); | ||
| 368 | if (!workspace) | ||
| 369 | return -ENOMEM; | ||
| 370 | |||
| 371 | data_in = kmap(pages_in[page_in_index]); | ||
| 372 | workspace->inf_strm.next_in = data_in; | ||
| 373 | workspace->inf_strm.avail_in = min(srclen, PAGE_CACHE_SIZE); | ||
| 374 | workspace->inf_strm.total_in = 0; | ||
| 375 | |||
| 376 | workspace->inf_strm.total_out = 0; | ||
| 377 | workspace->inf_strm.next_out = workspace->buf; | ||
| 378 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | ||
| 379 | page_out = bvec[page_out_index].bv_page; | ||
| 380 | page_bytes_left = PAGE_CACHE_SIZE; | ||
| 381 | pg_offset = 0; | ||
| 382 | |||
| 383 | /* If it's deflate, and it's got no preset dictionary, then | ||
| 384 | we can tell zlib to skip the adler32 check. */ | ||
| 385 | if (srclen > 2 && !(data_in[1] & PRESET_DICT) && | ||
| 386 | ((data_in[0] & 0x0f) == Z_DEFLATED) && | ||
| 387 | !(((data_in[0]<<8) + data_in[1]) % 31)) { | ||
| 388 | |||
| 389 | wbits = -((data_in[0] >> 4) + 8); | ||
| 390 | workspace->inf_strm.next_in += 2; | ||
| 391 | workspace->inf_strm.avail_in -= 2; | ||
| 392 | } | ||
| 393 | |||
| 394 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | ||
| 395 | printk(KERN_WARNING "inflateInit failed\n"); | ||
| 396 | ret = -1; | ||
| 397 | goto out; | ||
| 398 | } | ||
| 399 | while(workspace->inf_strm.total_in < srclen) { | ||
| 400 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | ||
| 401 | if (ret != Z_OK && ret != Z_STREAM_END) { | ||
| 402 | break; | ||
| 403 | } | ||
| 404 | |||
| 405 | /* | ||
| 406 | * buf start is the byte offset we're of the start of | ||
| 407 | * our workspace buffer | ||
| 408 | */ | ||
| 409 | buf_start = total_out; | ||
| 410 | |||
| 411 | /* total_out is the last byte of the workspace buffer */ | ||
| 412 | total_out = workspace->inf_strm.total_out; | ||
| 413 | |||
| 414 | working_bytes = total_out - buf_start; | ||
| 415 | |||
| 416 | /* | ||
| 417 | * start byte is the first byte of the page we're currently | ||
| 418 | * copying into relative to the start of the compressed data. | ||
| 419 | */ | ||
| 420 | start_byte = page_offset(page_out) - disk_start; | ||
| 421 | |||
| 422 | if (working_bytes == 0) { | ||
| 423 | /* we didn't make progress in this inflate | ||
| 424 | * call, we're done | ||
| 425 | */ | ||
| 426 | if (ret != Z_STREAM_END) | ||
| 427 | ret = -1; | ||
| 428 | break; | ||
| 429 | } | ||
| 430 | |||
| 431 | /* we haven't yet hit data corresponding to this page */ | ||
| 432 | if (total_out <= start_byte) { | ||
| 433 | goto next; | ||
| 434 | } | ||
| 435 | |||
| 436 | /* | ||
| 437 | * the start of the data we care about is offset into | ||
| 438 | * the middle of our working buffer | ||
| 439 | */ | ||
| 440 | if (total_out > start_byte && buf_start < start_byte) { | ||
| 441 | buf_offset = start_byte - buf_start; | ||
| 442 | working_bytes -= buf_offset; | ||
| 443 | } else { | ||
| 444 | buf_offset = 0; | ||
| 445 | } | ||
| 446 | current_buf_start = buf_start; | ||
| 447 | |||
| 448 | /* copy bytes from the working buffer into the pages */ | ||
| 449 | while(working_bytes > 0) { | ||
| 450 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
| 451 | PAGE_CACHE_SIZE - buf_offset); | ||
| 452 | bytes = min(bytes, working_bytes); | ||
| 453 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
| 454 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, | ||
| 455 | bytes); | ||
| 456 | kunmap_atomic(kaddr, KM_USER0); | ||
| 457 | flush_dcache_page(page_out); | ||
| 458 | |||
| 459 | pg_offset += bytes; | ||
| 460 | page_bytes_left -= bytes; | ||
| 461 | buf_offset += bytes; | ||
| 462 | working_bytes -= bytes; | ||
| 463 | current_buf_start += bytes; | ||
| 464 | |||
| 465 | /* check if we need to pick another page */ | ||
| 466 | if (page_bytes_left == 0) { | ||
| 467 | page_out_index++; | ||
| 468 | if (page_out_index >= vcnt) { | ||
| 469 | ret = 0; | ||
| 470 | goto done; | ||
| 471 | } | ||
| 472 | page_out = bvec[page_out_index].bv_page; | ||
| 473 | pg_offset = 0; | ||
| 474 | page_bytes_left = PAGE_CACHE_SIZE; | ||
| 475 | start_byte = page_offset(page_out) - disk_start; | ||
| 476 | |||
| 477 | /* | ||
| 478 | * make sure our new page is covered by this | ||
| 479 | * working buffer | ||
| 480 | */ | ||
| 481 | if (total_out <= start_byte) { | ||
| 482 | goto next; | ||
| 483 | } | ||
| 484 | |||
| 485 | /* the next page in the biovec might not | ||
| 486 | * be adjacent to the last page, but it | ||
| 487 | * might still be found inside this working | ||
| 488 | * buffer. bump our offset pointer | ||
| 489 | */ | ||
| 490 | if (total_out > start_byte && | ||
| 491 | current_buf_start < start_byte) { | ||
| 492 | buf_offset = start_byte - buf_start; | ||
| 493 | working_bytes = total_out - start_byte; | ||
| 494 | current_buf_start = buf_start + | ||
| 495 | buf_offset; | ||
| 496 | } | ||
| 497 | } | ||
| 498 | } | ||
| 499 | next: | ||
| 500 | workspace->inf_strm.next_out = workspace->buf; | ||
| 501 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | ||
| 502 | |||
| 503 | if (workspace->inf_strm.avail_in == 0) { | ||
| 504 | unsigned long tmp; | ||
| 505 | kunmap(pages_in[page_in_index]); | ||
| 506 | page_in_index++; | ||
| 507 | if (page_in_index >= total_pages_in) { | ||
| 508 | data_in = NULL; | ||
| 509 | break; | ||
| 510 | } | ||
| 511 | data_in = kmap(pages_in[page_in_index]); | ||
| 512 | workspace->inf_strm.next_in = data_in; | ||
| 513 | tmp = srclen - workspace->inf_strm.total_in; | ||
| 514 | workspace->inf_strm.avail_in = min(tmp, | ||
| 515 | PAGE_CACHE_SIZE); | ||
| 516 | } | ||
| 517 | } | ||
| 518 | if (ret != Z_STREAM_END) { | ||
| 519 | ret = -1; | ||
| 520 | } else { | ||
| 521 | ret = 0; | ||
| 522 | } | ||
| 523 | done: | ||
| 524 | zlib_inflateEnd(&workspace->inf_strm); | ||
| 525 | if (data_in) | ||
| 526 | kunmap(pages_in[page_in_index]); | ||
| 527 | out: | ||
| 528 | free_workspace(workspace); | ||
| 529 | return ret; | ||
| 530 | } | ||
| 531 | |||
| 532 | /* | ||
| 533 | * a less complex decompression routine. Our compressed data fits in a | ||
| 534 | * single page, and we want to read a single page out of it. | ||
| 535 | * start_byte tells us the offset into the compressed data we're interested in | ||
| 536 | */ | ||
| 537 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
| 538 | struct page *dest_page, | ||
| 539 | unsigned long start_byte, | ||
| 540 | size_t srclen, size_t destlen) | ||
| 541 | { | ||
| 542 | int ret = 0; | ||
| 543 | int wbits = MAX_WBITS; | ||
| 544 | struct workspace *workspace; | ||
| 545 | unsigned long bytes_left = destlen; | ||
| 546 | unsigned long total_out = 0; | ||
| 547 | char *kaddr; | ||
| 548 | |||
| 549 | if (destlen > PAGE_CACHE_SIZE) | ||
| 550 | return -ENOMEM; | ||
| 551 | |||
| 552 | workspace = find_zlib_workspace(); | ||
| 553 | if (!workspace) | ||
| 554 | return -ENOMEM; | ||
| 555 | |||
| 556 | workspace->inf_strm.next_in = data_in; | ||
| 557 | workspace->inf_strm.avail_in = srclen; | ||
| 558 | workspace->inf_strm.total_in = 0; | ||
| 559 | |||
| 560 | workspace->inf_strm.next_out = workspace->buf; | ||
| 561 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | ||
| 562 | workspace->inf_strm.total_out = 0; | ||
| 563 | /* If it's deflate, and it's got no preset dictionary, then | ||
| 564 | we can tell zlib to skip the adler32 check. */ | ||
| 565 | if (srclen > 2 && !(data_in[1] & PRESET_DICT) && | ||
| 566 | ((data_in[0] & 0x0f) == Z_DEFLATED) && | ||
| 567 | !(((data_in[0]<<8) + data_in[1]) % 31)) { | ||
| 568 | |||
| 569 | wbits = -((data_in[0] >> 4) + 8); | ||
| 570 | workspace->inf_strm.next_in += 2; | ||
| 571 | workspace->inf_strm.avail_in -= 2; | ||
| 572 | } | ||
| 573 | |||
| 574 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | ||
| 575 | printk(KERN_WARNING "inflateInit failed\n"); | ||
| 576 | ret = -1; | ||
| 577 | goto out; | ||
| 578 | } | ||
| 579 | |||
| 580 | while(bytes_left > 0) { | ||
| 581 | unsigned long buf_start; | ||
| 582 | unsigned long buf_offset; | ||
| 583 | unsigned long bytes; | ||
| 584 | unsigned long pg_offset = 0; | ||
| 585 | |||
| 586 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | ||
| 587 | if (ret != Z_OK && ret != Z_STREAM_END) { | ||
| 588 | break; | ||
| 589 | } | ||
| 590 | |||
| 591 | buf_start = total_out; | ||
| 592 | total_out = workspace->inf_strm.total_out; | ||
| 593 | |||
| 594 | if (total_out == buf_start) { | ||
| 595 | ret = -1; | ||
| 596 | break; | ||
| 597 | } | ||
| 598 | |||
| 599 | if (total_out <= start_byte) { | ||
| 600 | goto next; | ||
| 601 | } | ||
| 602 | |||
| 603 | if (total_out > start_byte && buf_start < start_byte) { | ||
| 604 | buf_offset = start_byte - buf_start; | ||
| 605 | } else { | ||
| 606 | buf_offset = 0; | ||
| 607 | } | ||
| 608 | |||
| 609 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
| 610 | PAGE_CACHE_SIZE - buf_offset); | ||
| 611 | bytes = min(bytes, bytes_left); | ||
| 612 | |||
| 613 | kaddr = kmap_atomic(dest_page, KM_USER0); | ||
| 614 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); | ||
| 615 | kunmap_atomic(kaddr, KM_USER0); | ||
| 616 | |||
| 617 | pg_offset += bytes; | ||
| 618 | bytes_left -= bytes; | ||
| 619 | next: | ||
| 620 | workspace->inf_strm.next_out = workspace->buf; | ||
| 621 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | ||
| 622 | } | ||
| 623 | if (ret != Z_STREAM_END && bytes_left != 0) { | ||
| 624 | ret = -1; | ||
| 625 | } else { | ||
| 626 | ret = 0; | ||
| 627 | } | ||
| 628 | zlib_inflateEnd(&workspace->inf_strm); | ||
| 629 | out: | ||
| 630 | free_workspace(workspace); | ||
| 631 | return ret; | ||
| 632 | } | ||
| 633 | |||
| 634 | void btrfs_zlib_exit(void) | ||
| 635 | { | ||
| 636 | free_workspaces(); | ||
| 637 | } | ||
