aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c584
1 files changed, 539 insertions, 45 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bf4bed6ca4d6..9797592dc86b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -49,6 +49,7 @@
49#include "compat.h" 49#include "compat.h"
50#include "tree-log.h" 50#include "tree-log.h"
51#include "ref-cache.h" 51#include "ref-cache.h"
52#include "compression.h"
52 53
53struct btrfs_iget_args { 54struct btrfs_iget_args {
54 u64 ino; 55 u64 ino;
@@ -83,6 +84,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
83}; 84};
84 85
85static void btrfs_truncate(struct inode *inode); 86static void btrfs_truncate(struct inode *inode);
87static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
86 88
87/* 89/*
88 * a very lame attempt at stopping writes when the FS is 85% full. There 90 * a very lame attempt at stopping writes when the FS is 85% full. There
@@ -114,57 +116,374 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
114} 116}
115 117
116/* 118/*
119 * this does all the hard work for inserting an inline extent into
120 * the btree. The caller should have done a btrfs_drop_extents so that
121 * no overlapping inline items exist in the btree
122 */
123static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
124 struct btrfs_root *root, struct inode *inode,
125 u64 start, size_t size, size_t compressed_size,
126 struct page **compressed_pages)
127{
128 struct btrfs_key key;
129 struct btrfs_path *path;
130 struct extent_buffer *leaf;
131 struct page *page = NULL;
132 char *kaddr;
133 unsigned long ptr;
134 struct btrfs_file_extent_item *ei;
135 int err = 0;
136 int ret;
137 size_t cur_size = size;
138 size_t datasize;
139 unsigned long offset;
140 int use_compress = 0;
141
142 if (compressed_size && compressed_pages) {
143 use_compress = 1;
144 cur_size = compressed_size;
145 }
146
147 path = btrfs_alloc_path(); if (!path)
148 return -ENOMEM;
149
150 btrfs_set_trans_block_group(trans, inode);
151
152 key.objectid = inode->i_ino;
153 key.offset = start;
154 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
155 inode_add_bytes(inode, size);
156 datasize = btrfs_file_extent_calc_inline_size(cur_size);
157
158 inode_add_bytes(inode, size);
159 ret = btrfs_insert_empty_item(trans, root, path, &key,
160 datasize);
161 BUG_ON(ret);
162 if (ret) {
163 err = ret;
164 printk("got bad ret %d\n", ret);
165 goto fail;
166 }
167 leaf = path->nodes[0];
168 ei = btrfs_item_ptr(leaf, path->slots[0],
169 struct btrfs_file_extent_item);
170 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
171 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
172 btrfs_set_file_extent_encryption(leaf, ei, 0);
173 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
174 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
175 ptr = btrfs_file_extent_inline_start(ei);
176
177 if (use_compress) {
178 struct page *cpage;
179 int i = 0;
180 while(compressed_size > 0) {
181 cpage = compressed_pages[i];
182 cur_size = min(compressed_size,
183 PAGE_CACHE_SIZE);
184
185 kaddr = kmap(cpage);
186 write_extent_buffer(leaf, kaddr, ptr, cur_size);
187 kunmap(cpage);
188
189 i++;
190 ptr += cur_size;
191 compressed_size -= cur_size;
192 }
193 btrfs_set_file_extent_compression(leaf, ei,
194 BTRFS_COMPRESS_ZLIB);
195 } else {
196 page = find_get_page(inode->i_mapping,
197 start >> PAGE_CACHE_SHIFT);
198 btrfs_set_file_extent_compression(leaf, ei, 0);
199 kaddr = kmap_atomic(page, KM_USER0);
200 offset = start & (PAGE_CACHE_SIZE - 1);
201 write_extent_buffer(leaf, kaddr + offset, ptr, size);
202 kunmap_atomic(kaddr, KM_USER0);
203 page_cache_release(page);
204 }
205 btrfs_mark_buffer_dirty(leaf);
206 btrfs_free_path(path);
207
208 BTRFS_I(inode)->disk_i_size = inode->i_size;
209 btrfs_update_inode(trans, root, inode);
210 return 0;
211fail:
212 btrfs_free_path(path);
213 return err;
214}
215
216
217/*
218 * conditionally insert an inline extent into the file. This
219 * does the checks required to make sure the data is small enough
220 * to fit as an inline extent.
221 */
222static int cow_file_range_inline(struct btrfs_trans_handle *trans,
223 struct btrfs_root *root,
224 struct inode *inode, u64 start, u64 end,
225 size_t compressed_size,
226 struct page **compressed_pages)
227{
228 u64 isize = i_size_read(inode);
229 u64 actual_end = min(end + 1, isize);
230 u64 inline_len = actual_end - start;
231 u64 aligned_end = (end + root->sectorsize - 1) &
232 ~((u64)root->sectorsize - 1);
233 u64 hint_byte;
234 u64 data_len = inline_len;
235 int ret;
236
237 if (compressed_size)
238 data_len = compressed_size;
239
240 if (start > 0 ||
241 data_len >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
242 (!compressed_size &&
243 (actual_end & (root->sectorsize - 1)) == 0) ||
244 end + 1 < isize ||
245 data_len > root->fs_info->max_inline) {
246 return 1;
247 }
248
249 mutex_lock(&BTRFS_I(inode)->extent_mutex);
250 ret = btrfs_drop_extents(trans, root, inode, start,
251 aligned_end, aligned_end, &hint_byte);
252 BUG_ON(ret);
253
254 if (isize > actual_end)
255 inline_len = min_t(u64, isize, actual_end);
256 ret = insert_inline_extent(trans, root, inode, start,
257 inline_len, compressed_size,
258 compressed_pages);
259 BUG_ON(ret);
260 btrfs_drop_extent_cache(inode, start, aligned_end, 0);
261 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
262 return 0;
263}
264
265/*
117 * when extent_io.c finds a delayed allocation range in the file, 266 * when extent_io.c finds a delayed allocation range in the file,
118 * the call backs end up in this code. The basic idea is to 267 * the call backs end up in this code. The basic idea is to
119 * allocate extents on disk for the range, and create ordered data structs 268 * allocate extents on disk for the range, and create ordered data structs
120 * in ram to track those extents. 269 * in ram to track those extents.
270 *
271 * locked_page is the page that writepage had locked already. We use
272 * it to make sure we don't do extra locks or unlocks.
273 *
274 * *page_started is set to one if we unlock locked_page and do everything
275 * required to start IO on it. It may be clean and already done with
276 * IO when we return.
121 */ 277 */
122static int cow_file_range(struct inode *inode, u64 start, u64 end) 278static int cow_file_range(struct inode *inode, struct page *locked_page,
279 u64 start, u64 end, int *page_started)
123{ 280{
124 struct btrfs_root *root = BTRFS_I(inode)->root; 281 struct btrfs_root *root = BTRFS_I(inode)->root;
125 struct btrfs_trans_handle *trans; 282 struct btrfs_trans_handle *trans;
126 u64 alloc_hint = 0; 283 u64 alloc_hint = 0;
127 u64 num_bytes; 284 u64 num_bytes;
285 unsigned long ram_size;
286 u64 orig_start;
287 u64 disk_num_bytes;
128 u64 cur_alloc_size; 288 u64 cur_alloc_size;
129 u64 blocksize = root->sectorsize; 289 u64 blocksize = root->sectorsize;
130 u64 orig_num_bytes; 290 u64 actual_end;
131 struct btrfs_key ins; 291 struct btrfs_key ins;
132 struct extent_map *em; 292 struct extent_map *em;
133 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 293 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
134 int ret = 0; 294 int ret = 0;
295 struct page **pages = NULL;
296 unsigned long nr_pages;
297 unsigned long nr_pages_ret = 0;
298 unsigned long total_compressed = 0;
299 unsigned long total_in = 0;
300 unsigned long max_compressed = 128 * 1024;
301 unsigned long max_uncompressed = 256 * 1024;
302 int i;
303 int will_compress;
135 304
136 trans = btrfs_join_transaction(root, 1); 305 trans = btrfs_join_transaction(root, 1);
137 BUG_ON(!trans); 306 BUG_ON(!trans);
138 btrfs_set_trans_block_group(trans, inode); 307 btrfs_set_trans_block_group(trans, inode);
308 orig_start = start;
309
310 /*
311 * compression made this loop a bit ugly, but the basic idea is to
312 * compress some pages but keep the total size of the compressed
313 * extent relatively small. If compression is off, this goto target
314 * is never used.
315 */
316again:
317 will_compress = 0;
318 nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
319 nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE);
139 320
321 actual_end = min_t(u64, i_size_read(inode), end + 1);
322 total_compressed = actual_end - start;
323
324 /* we want to make sure that amount of ram required to uncompress
325 * an extent is reasonable, so we limit the total size in ram
326 * of a compressed extent to 256k
327 */
328 total_compressed = min(total_compressed, max_uncompressed);
140 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 329 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
141 num_bytes = max(blocksize, num_bytes); 330 num_bytes = max(blocksize, num_bytes);
142 orig_num_bytes = num_bytes; 331 disk_num_bytes = num_bytes;
332 total_in = 0;
333 ret = 0;
143 334
144 if (alloc_hint == EXTENT_MAP_INLINE) 335 /* we do compression for mount -o compress and when the
145 goto out; 336 * inode has not been flagged as nocompress
337 */
338 if (!btrfs_test_flag(inode, NOCOMPRESS) &&
339 btrfs_test_opt(root, COMPRESS)) {
340 WARN_ON(pages);
341 pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
342
343 /* we want to make sure the amount of IO required to satisfy
344 * a random read is reasonably small, so we limit the size
345 * of a compressed extent to 128k
346 */
347 ret = btrfs_zlib_compress_pages(inode->i_mapping, start,
348 total_compressed, pages,
349 nr_pages, &nr_pages_ret,
350 &total_in,
351 &total_compressed,
352 max_compressed);
353
354 if (!ret) {
355 unsigned long offset = total_compressed &
356 (PAGE_CACHE_SIZE - 1);
357 struct page *page = pages[nr_pages_ret - 1];
358 char *kaddr;
359
360 /* zero the tail end of the last page, we might be
361 * sending it down to disk
362 */
363 if (offset) {
364 kaddr = kmap_atomic(page, KM_USER0);
365 memset(kaddr + offset, 0,
366 PAGE_CACHE_SIZE - offset);
367 kunmap_atomic(kaddr, KM_USER0);
368 }
369 will_compress = 1;
370 }
371 }
372 if (start == 0) {
373 /* lets try to make an inline extent */
374 if (ret || total_in < (end - start + 1)) {
375 /* we didn't compress the entire range, try
376 * to make an uncompressed inline extent. This
377 * is almost sure to fail, but maybe inline sizes
378 * will get bigger later
379 */
380 ret = cow_file_range_inline(trans, root, inode,
381 start, end, 0, NULL);
382 } else {
383 ret = cow_file_range_inline(trans, root, inode,
384 start, end,
385 total_compressed, pages);
386 }
387 if (ret == 0) {
388 extent_clear_unlock_delalloc(inode,
389 &BTRFS_I(inode)->io_tree,
390 start, end, NULL,
391 1, 1, 1);
392 *page_started = 1;
393 ret = 0;
394 goto free_pages_out;
395 }
396 }
397
398 if (will_compress) {
399 /*
400 * we aren't doing an inline extent round the compressed size
401 * up to a block size boundary so the allocator does sane
402 * things
403 */
404 total_compressed = (total_compressed + blocksize - 1) &
405 ~(blocksize - 1);
406
407 /*
408 * one last check to make sure the compression is really a
409 * win, compare the page count read with the blocks on disk
410 */
411 total_in = (total_in + PAGE_CACHE_SIZE - 1) &
412 ~(PAGE_CACHE_SIZE - 1);
413 if (total_compressed >= total_in) {
414 will_compress = 0;
415 } else {
416 disk_num_bytes = total_compressed;
417 num_bytes = total_in;
418 }
419 }
420 if (!will_compress && pages) {
421 /*
422 * the compression code ran but failed to make things smaller,
423 * free any pages it allocated and our page pointer array
424 */
425 for (i = 0; i < nr_pages_ret; i++) {
426 page_cache_release(pages[i]);
427 }
428 kfree(pages);
429 pages = NULL;
430 total_compressed = 0;
431 nr_pages_ret = 0;
432
433 /* flag the file so we don't compress in the future */
434 btrfs_set_flag(inode, NOCOMPRESS);
435 }
436
437 BUG_ON(disk_num_bytes >
438 btrfs_super_total_bytes(&root->fs_info->super_copy));
146 439
147 BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
148 mutex_lock(&BTRFS_I(inode)->extent_mutex); 440 mutex_lock(&BTRFS_I(inode)->extent_mutex);
149 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 441 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
150 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 442 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
151 443
152 while(num_bytes > 0) { 444 while(disk_num_bytes > 0) {
153 cur_alloc_size = min(num_bytes, root->fs_info->max_extent); 445 unsigned long min_bytes;
446
447 /*
448 * the max size of a compressed extent is pretty small,
449 * make the code a little less complex by forcing
450 * the allocator to find a whole compressed extent at once
451 */
452 if (will_compress)
453 min_bytes = disk_num_bytes;
454 else
455 min_bytes = root->sectorsize;
456
457 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent);
154 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 458 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
155 root->sectorsize, 0, alloc_hint, 459 min_bytes, 0, alloc_hint,
156 (u64)-1, &ins, 1); 460 (u64)-1, &ins, 1);
157 if (ret) { 461 if (ret) {
158 WARN_ON(1); 462 WARN_ON(1);
159 goto out; 463 goto free_pages_out_fail;
160 } 464 }
161 em = alloc_extent_map(GFP_NOFS); 465 em = alloc_extent_map(GFP_NOFS);
162 em->start = start; 466 em->start = start;
163 em->len = ins.offset; 467
468 if (will_compress) {
469 ram_size = num_bytes;
470 em->len = num_bytes;
471 } else {
472 /* ramsize == disk size */
473 ram_size = ins.offset;
474 em->len = ins.offset;
475 }
476
164 em->block_start = ins.objectid; 477 em->block_start = ins.objectid;
478 em->block_len = ins.offset;
165 em->bdev = root->fs_info->fs_devices->latest_bdev; 479 em->bdev = root->fs_info->fs_devices->latest_bdev;
480
166 mutex_lock(&BTRFS_I(inode)->extent_mutex); 481 mutex_lock(&BTRFS_I(inode)->extent_mutex);
167 set_bit(EXTENT_FLAG_PINNED, &em->flags); 482 set_bit(EXTENT_FLAG_PINNED, &em->flags);
483
484 if (will_compress)
485 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
486
168 while(1) { 487 while(1) {
169 spin_lock(&em_tree->lock); 488 spin_lock(&em_tree->lock);
170 ret = add_extent_mapping(em_tree, em); 489 ret = add_extent_mapping(em_tree, em);
@@ -174,26 +493,95 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
174 break; 493 break;
175 } 494 }
176 btrfs_drop_extent_cache(inode, start, 495 btrfs_drop_extent_cache(inode, start,
177 start + ins.offset - 1, 0); 496 start + ram_size - 1, 0);
178 } 497 }
179 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 498 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
180 499
181 cur_alloc_size = ins.offset; 500 cur_alloc_size = ins.offset;
182 ret = btrfs_add_ordered_extent(inode, start, ins.objectid, 501 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
183 ins.offset, 0); 502 ram_size, cur_alloc_size, 0,
503 will_compress);
184 BUG_ON(ret); 504 BUG_ON(ret);
185 if (num_bytes < cur_alloc_size) { 505
186 printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, 506 if (disk_num_bytes < cur_alloc_size) {
507 printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes,
187 cur_alloc_size); 508 cur_alloc_size);
188 break; 509 break;
189 } 510 }
511
512 if (will_compress) {
513 /*
514 * we're doing compression, we and we need to
515 * submit the compressed extents down to the device.
516 *
517 * We lock down all the file pages, clearing their
518 * dirty bits and setting them writeback. Everyone
519 * that wants to modify the page will wait on the
520 * ordered extent above.
521 *
522 * The writeback bits on the file pages are
523 * cleared when the compressed pages are on disk
524 */
525 btrfs_end_transaction(trans, root);
526
527 if (start <= page_offset(locked_page) &&
528 page_offset(locked_page) < start + ram_size) {
529 *page_started = 1;
530 }
531
532 extent_clear_unlock_delalloc(inode,
533 &BTRFS_I(inode)->io_tree,
534 start,
535 start + ram_size - 1,
536 NULL, 1, 1, 0);
537
538 ret = btrfs_submit_compressed_write(inode, start,
539 ram_size, ins.objectid,
540 cur_alloc_size, pages,
541 nr_pages_ret);
542
543 BUG_ON(ret);
544 trans = btrfs_join_transaction(root, 1);
545 if (start + ram_size < end) {
546 start += ram_size;
547 alloc_hint = ins.objectid + ins.offset;
548 /* pages will be freed at end_bio time */
549 pages = NULL;
550 goto again;
551 } else {
552 /* we've written everything, time to go */
553 break;
554 }
555 }
556 /* we're not doing compressed IO, don't unlock the first
557 * page (which the caller expects to stay locked), don't
558 * clear any dirty bits and don't set any writeback bits
559 */
560 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
561 start, start + ram_size - 1,
562 locked_page, 0, 0, 0);
563 disk_num_bytes -= cur_alloc_size;
190 num_bytes -= cur_alloc_size; 564 num_bytes -= cur_alloc_size;
191 alloc_hint = ins.objectid + ins.offset; 565 alloc_hint = ins.objectid + ins.offset;
192 start += cur_alloc_size; 566 start += cur_alloc_size;
193 } 567 }
568
569 ret = 0;
194out: 570out:
195 btrfs_end_transaction(trans, root); 571 btrfs_end_transaction(trans, root);
572
196 return ret; 573 return ret;
574
575free_pages_out_fail:
576 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
577 start, end, locked_page, 0, 0, 0);
578free_pages_out:
579 for (i = 0; i < nr_pages_ret; i++)
580 page_cache_release(pages[i]);
581 if (pages)
582 kfree(pages);
583
584 goto out;
197} 585}
198 586
199/* 587/*
@@ -203,7 +591,8 @@ out:
203 * If no cow copies or snapshots exist, we write directly to the existing 591 * If no cow copies or snapshots exist, we write directly to the existing
204 * blocks on disk 592 * blocks on disk
205 */ 593 */
206static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) 594static int run_delalloc_nocow(struct inode *inode, struct page *locked_page,
595 u64 start, u64 end, int *page_started)
207{ 596{
208 u64 extent_start; 597 u64 extent_start;
209 u64 extent_end; 598 u64 extent_end;
@@ -260,6 +649,11 @@ again:
260 extent_end = extent_start + extent_num_bytes; 649 extent_end = extent_start + extent_num_bytes;
261 err = 0; 650 err = 0;
262 651
652 if (btrfs_file_extent_compression(leaf, item) ||
653 btrfs_file_extent_encryption(leaf,item) ||
654 btrfs_file_extent_other_encoding(leaf, item))
655 goto not_found;
656
263 if (loops && start != extent_start) 657 if (loops && start != extent_start)
264 goto not_found; 658 goto not_found;
265 659
@@ -284,7 +678,8 @@ again:
284 bytenr += btrfs_file_extent_offset(leaf, item); 678 bytenr += btrfs_file_extent_offset(leaf, item);
285 extent_num_bytes = min(end + 1, extent_end) - start; 679 extent_num_bytes = min(end + 1, extent_end) - start;
286 ret = btrfs_add_ordered_extent(inode, start, bytenr, 680 ret = btrfs_add_ordered_extent(inode, start, bytenr,
287 extent_num_bytes, 1); 681 extent_num_bytes,
682 extent_num_bytes, 1, 0);
288 if (ret) { 683 if (ret) {
289 err = ret; 684 err = ret;
290 goto out; 685 goto out;
@@ -300,7 +695,8 @@ again:
300not_found: 695not_found:
301 btrfs_end_transaction(trans, root); 696 btrfs_end_transaction(trans, root);
302 btrfs_free_path(path); 697 btrfs_free_path(path);
303 return cow_file_range(inode, start, end); 698 return cow_file_range(inode, locked_page, start, end,
699 page_started);
304 } 700 }
305out: 701out:
306 WARN_ON(err); 702 WARN_ON(err);
@@ -312,16 +708,19 @@ out:
312/* 708/*
313 * extent_io.c call back to do delayed allocation processing 709 * extent_io.c call back to do delayed allocation processing
314 */ 710 */
315static int run_delalloc_range(struct inode *inode, u64 start, u64 end) 711static int run_delalloc_range(struct inode *inode, struct page *locked_page,
712 u64 start, u64 end, int *page_started)
316{ 713{
317 struct btrfs_root *root = BTRFS_I(inode)->root; 714 struct btrfs_root *root = BTRFS_I(inode)->root;
318 int ret; 715 int ret;
319 716
320 if (btrfs_test_opt(root, NODATACOW) || 717 if (btrfs_test_opt(root, NODATACOW) ||
321 btrfs_test_flag(inode, NODATACOW)) 718 btrfs_test_flag(inode, NODATACOW))
322 ret = run_delalloc_nocow(inode, start, end); 719 ret = run_delalloc_nocow(inode, locked_page, start, end,
720 page_started);
323 else 721 else
324 ret = cow_file_range(inode, start, end); 722 ret = cow_file_range(inode, locked_page, start, end,
723 page_started);
325 724
326 return ret; 725 return ret;
327} 726}
@@ -383,7 +782,8 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
383 * we don't create bios that span stripes or chunks 782 * we don't create bios that span stripes or chunks
384 */ 783 */
385int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 784int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
386 size_t size, struct bio *bio) 785 size_t size, struct bio *bio,
786 unsigned long bio_flags)
387{ 787{
388 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 788 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
389 struct btrfs_mapping_tree *map_tree; 789 struct btrfs_mapping_tree *map_tree;
@@ -413,7 +813,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
413 * are inserted into the btree 813 * are inserted into the btree
414 */ 814 */
415int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 815int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
416 int mirror_num) 816 int mirror_num, unsigned long bio_flags)
417{ 817{
418 struct btrfs_root *root = BTRFS_I(inode)->root; 818 struct btrfs_root *root = BTRFS_I(inode)->root;
419 int ret = 0; 819 int ret = 0;
@@ -429,7 +829,7 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
429 * or reading the csums from the tree before a read 829 * or reading the csums from the tree before a read
430 */ 830 */
431int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 831int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
432 int mirror_num) 832 int mirror_num, unsigned long bio_flags)
433{ 833{
434 struct btrfs_root *root = BTRFS_I(inode)->root; 834 struct btrfs_root *root = BTRFS_I(inode)->root;
435 int ret = 0; 835 int ret = 0;
@@ -444,11 +844,17 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
444 844
445 if (!(rw & (1 << BIO_RW))) { 845 if (!(rw & (1 << BIO_RW))) {
446 btrfs_lookup_bio_sums(root, inode, bio); 846 btrfs_lookup_bio_sums(root, inode, bio);
847
848 if (bio_flags & EXTENT_BIO_COMPRESSED) {
849 return btrfs_submit_compressed_read(inode, bio,
850 mirror_num, bio_flags);
851 }
852
447 goto mapit; 853 goto mapit;
448 } 854 }
449 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, 855 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
450 inode, rw, bio, mirror_num, 856 inode, rw, bio, mirror_num,
451 __btrfs_submit_bio_hook); 857 bio_flags, __btrfs_submit_bio_hook);
452mapit: 858mapit:
453 return btrfs_map_bio(root, rw, bio, mirror_num, 0); 859 return btrfs_map_bio(root, rw, bio, mirror_num, 0);
454} 860}
@@ -539,7 +945,7 @@ out_page:
539 * good idea. This causes problems because we want to make sure COW 945 * good idea. This causes problems because we want to make sure COW
540 * properly happens and the data=ordered rules are followed. 946 * properly happens and the data=ordered rules are followed.
541 * 947 *
542 * In our case any range that doesn't have the EXTENT_ORDERED bit set 948 * In our case any range that doesn't have the ORDERED bit set
543 * hasn't been properly setup for IO. We kick off an async process 949 * hasn't been properly setup for IO. We kick off an async process
544 * to fix it up. The async helper will wait for ordered extents, set 950 * to fix it up. The async helper will wait for ordered extents, set
545 * the delalloc bit and make it safe to write the page. 951 * the delalloc bit and make it safe to write the page.
@@ -632,10 +1038,21 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
632 btrfs_set_file_extent_disk_bytenr(leaf, extent_item, 1038 btrfs_set_file_extent_disk_bytenr(leaf, extent_item,
633 ordered_extent->start); 1039 ordered_extent->start);
634 btrfs_set_file_extent_disk_num_bytes(leaf, extent_item, 1040 btrfs_set_file_extent_disk_num_bytes(leaf, extent_item,
635 ordered_extent->len); 1041 ordered_extent->disk_len);
636 btrfs_set_file_extent_offset(leaf, extent_item, 0); 1042 btrfs_set_file_extent_offset(leaf, extent_item, 0);
1043
1044 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1045 btrfs_set_file_extent_compression(leaf, extent_item, 1);
1046 else
1047 btrfs_set_file_extent_compression(leaf, extent_item, 0);
1048 btrfs_set_file_extent_encryption(leaf, extent_item, 0);
1049 btrfs_set_file_extent_other_encoding(leaf, extent_item, 0);
1050
1051 /* ram bytes = extent_num_bytes for now */
637 btrfs_set_file_extent_num_bytes(leaf, extent_item, 1052 btrfs_set_file_extent_num_bytes(leaf, extent_item,
638 ordered_extent->len); 1053 ordered_extent->len);
1054 btrfs_set_file_extent_ram_bytes(leaf, extent_item,
1055 ordered_extent->len);
639 btrfs_mark_buffer_dirty(leaf); 1056 btrfs_mark_buffer_dirty(leaf);
640 1057
641 btrfs_drop_extent_cache(inode, ordered_extent->file_offset, 1058 btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
@@ -644,7 +1061,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
644 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 1061 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
645 1062
646 ins.objectid = ordered_extent->start; 1063 ins.objectid = ordered_extent->start;
647 ins.offset = ordered_extent->len; 1064 ins.offset = ordered_extent->disk_len;
648 ins.type = BTRFS_EXTENT_ITEM_KEY; 1065 ins.type = BTRFS_EXTENT_ITEM_KEY;
649 ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, 1066 ret = btrfs_alloc_reserved_extent(trans, root, leaf->start,
650 root->root_key.objectid, 1067 root->root_key.objectid,
@@ -714,6 +1131,7 @@ int btrfs_io_failed_hook(struct bio *failed_bio,
714 int ret; 1131 int ret;
715 int rw; 1132 int rw;
716 u64 logical; 1133 u64 logical;
1134 unsigned long bio_flags = 0;
717 1135
718 ret = get_state_private(failure_tree, start, &private); 1136 ret = get_state_private(failure_tree, start, &private);
719 if (ret) { 1137 if (ret) {
@@ -738,6 +1156,8 @@ int btrfs_io_failed_hook(struct bio *failed_bio,
738 } 1156 }
739 logical = start - em->start; 1157 logical = start - em->start;
740 logical = em->block_start + logical; 1158 logical = em->block_start + logical;
1159 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
1160 bio_flags = EXTENT_BIO_COMPRESSED;
741 failrec->logical = logical; 1161 failrec->logical = logical;
742 free_extent_map(em); 1162 free_extent_map(em);
743 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | 1163 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
@@ -781,7 +1201,8 @@ int btrfs_io_failed_hook(struct bio *failed_bio,
781 rw = READ; 1201 rw = READ;
782 1202
783 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, 1203 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
784 failrec->last_mirror); 1204 failrec->last_mirror,
1205 bio_flags);
785 return 0; 1206 return 0;
786} 1207}
787 1208
@@ -1644,10 +2065,8 @@ search_again:
1644 item_end += 2065 item_end +=
1645 btrfs_file_extent_num_bytes(leaf, fi); 2066 btrfs_file_extent_num_bytes(leaf, fi);
1646 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 2067 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1647 struct btrfs_item *item = btrfs_item_nr(leaf,
1648 path->slots[0]);
1649 item_end += btrfs_file_extent_inline_len(leaf, 2068 item_end += btrfs_file_extent_inline_len(leaf,
1650 item); 2069 fi);
1651 } 2070 }
1652 item_end--; 2071 item_end--;
1653 } 2072 }
@@ -1715,7 +2134,14 @@ search_again:
1715 root_owner = btrfs_header_owner(leaf); 2134 root_owner = btrfs_header_owner(leaf);
1716 } 2135 }
1717 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 2136 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1718 if (!del_item) { 2137 /*
2138 * we can't truncate inline items that have had
2139 * special encodings
2140 */
2141 if (!del_item &&
2142 btrfs_file_extent_compression(leaf, fi) == 0 &&
2143 btrfs_file_extent_encryption(leaf, fi) == 0 &&
2144 btrfs_file_extent_other_encoding(leaf, fi) == 0) {
1719 u32 size = new_size - found_key.offset; 2145 u32 size = new_size - found_key.offset;
1720 2146
1721 if (root->ref_cows) { 2147 if (root->ref_cows) {
@@ -1926,7 +2352,8 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1926 err = btrfs_insert_file_extent(trans, root, 2352 err = btrfs_insert_file_extent(trans, root,
1927 inode->i_ino, 2353 inode->i_ino,
1928 hole_start, 0, 0, 2354 hole_start, 0, 0,
1929 hole_size, 0); 2355 hole_size, 0, hole_size,
2356 0, 0, 0);
1930 btrfs_drop_extent_cache(inode, hole_start, 2357 btrfs_drop_extent_cache(inode, hole_start,
1931 (u64)-1, 0); 2358 (u64)-1, 0);
1932 btrfs_check_file(root, inode); 2359 btrfs_check_file(root, inode);
@@ -2894,11 +3321,50 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree,
2894 start_diff = map_start - em->start; 3321 start_diff = map_start - em->start;
2895 em->start = map_start; 3322 em->start = map_start;
2896 em->len = map_len; 3323 em->len = map_len;
2897 if (em->block_start < EXTENT_MAP_LAST_BYTE) 3324 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
3325 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2898 em->block_start += start_diff; 3326 em->block_start += start_diff;
3327 em->block_len -= start_diff;
3328 }
2899 return add_extent_mapping(em_tree, em); 3329 return add_extent_mapping(em_tree, em);
2900} 3330}
2901 3331
3332static noinline int uncompress_inline(struct btrfs_path *path,
3333 struct inode *inode, struct page *page,
3334 size_t pg_offset, u64 extent_offset,
3335 struct btrfs_file_extent_item *item)
3336{
3337 int ret;
3338 struct extent_buffer *leaf = path->nodes[0];
3339 char *tmp;
3340 size_t max_size;
3341 unsigned long inline_size;
3342 unsigned long ptr;
3343
3344 WARN_ON(pg_offset != 0);
3345 max_size = btrfs_file_extent_ram_bytes(leaf, item);
3346 inline_size = btrfs_file_extent_inline_item_len(leaf,
3347 btrfs_item_nr(leaf, path->slots[0]));
3348 tmp = kmalloc(inline_size, GFP_NOFS);
3349 ptr = btrfs_file_extent_inline_start(item);
3350
3351 read_extent_buffer(leaf, tmp, ptr, inline_size);
3352
3353 max_size = min(PAGE_CACHE_SIZE, max_size);
3354 ret = btrfs_zlib_decompress(tmp, page, extent_offset,
3355 inline_size, max_size);
3356 if (ret) {
3357 char *kaddr = kmap_atomic(page, KM_USER0);
3358 unsigned long copy_size = min_t(u64,
3359 PAGE_CACHE_SIZE - pg_offset,
3360 max_size - extent_offset);
3361 memset(kaddr + pg_offset, 0, copy_size);
3362 kunmap_atomic(kaddr, KM_USER0);
3363 }
3364 kfree(tmp);
3365 return 0;
3366}
3367
2902/* 3368/*
2903 * a bit scary, this does extent mapping from logical file offset to the disk. 3369 * a bit scary, this does extent mapping from logical file offset to the disk.
2904 * the ugly parts come from merging extents from the disk with the 3370 * the ugly parts come from merging extents from the disk with the
@@ -2927,6 +3393,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2927 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 3393 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2928 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3394 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2929 struct btrfs_trans_handle *trans = NULL; 3395 struct btrfs_trans_handle *trans = NULL;
3396 int compressed;
2930 3397
2931again: 3398again:
2932 spin_lock(&em_tree->lock); 3399 spin_lock(&em_tree->lock);
@@ -2951,6 +3418,7 @@ again:
2951 em->bdev = root->fs_info->fs_devices->latest_bdev; 3418 em->bdev = root->fs_info->fs_devices->latest_bdev;
2952 em->start = EXTENT_MAP_HOLE; 3419 em->start = EXTENT_MAP_HOLE;
2953 em->len = (u64)-1; 3420 em->len = (u64)-1;
3421 em->block_len = (u64)-1;
2954 3422
2955 if (!path) { 3423 if (!path) {
2956 path = btrfs_alloc_path(); 3424 path = btrfs_alloc_path();
@@ -2983,6 +3451,7 @@ again:
2983 3451
2984 found_type = btrfs_file_extent_type(leaf, item); 3452 found_type = btrfs_file_extent_type(leaf, item);
2985 extent_start = found_key.offset; 3453 extent_start = found_key.offset;
3454 compressed = btrfs_file_extent_compression(leaf, item);
2986 if (found_type == BTRFS_FILE_EXTENT_REG) { 3455 if (found_type == BTRFS_FILE_EXTENT_REG) {
2987 extent_end = extent_start + 3456 extent_end = extent_start +
2988 btrfs_file_extent_num_bytes(leaf, item); 3457 btrfs_file_extent_num_bytes(leaf, item);
@@ -3005,10 +3474,18 @@ again:
3005 em->block_start = EXTENT_MAP_HOLE; 3474 em->block_start = EXTENT_MAP_HOLE;
3006 goto insert; 3475 goto insert;
3007 } 3476 }
3008 bytenr += btrfs_file_extent_offset(leaf, item);
3009 em->block_start = bytenr;
3010 em->start = extent_start; 3477 em->start = extent_start;
3011 em->len = extent_end - extent_start; 3478 em->len = extent_end - extent_start;
3479 if (compressed) {
3480 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3481 em->block_start = bytenr;
3482 em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
3483 item);
3484 } else {
3485 bytenr += btrfs_file_extent_offset(leaf, item);
3486 em->block_start = bytenr;
3487 em->block_len = em->len;
3488 }
3012 goto insert; 3489 goto insert;
3013 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 3490 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
3014 u64 page_start; 3491 u64 page_start;
@@ -3018,8 +3495,7 @@ again:
3018 size_t extent_offset; 3495 size_t extent_offset;
3019 size_t copy_size; 3496 size_t copy_size;
3020 3497
3021 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, 3498 size = btrfs_file_extent_inline_len(leaf, item);
3022 path->slots[0]));
3023 extent_end = (extent_start + size + root->sectorsize - 1) & 3499 extent_end = (extent_start + size + root->sectorsize - 1) &
3024 ~((u64)root->sectorsize - 1); 3500 ~((u64)root->sectorsize - 1);
3025 if (start < extent_start || start >= extent_end) { 3501 if (start < extent_start || start >= extent_end) {
@@ -3035,9 +3511,10 @@ again:
3035 } 3511 }
3036 em->block_start = EXTENT_MAP_INLINE; 3512 em->block_start = EXTENT_MAP_INLINE;
3037 3513
3038 if (!page) { 3514 if (!page || create) {
3039 em->start = extent_start; 3515 em->start = extent_start;
3040 em->len = size; 3516 em->len = (size + root->sectorsize - 1) &
3517 ~((u64)root->sectorsize - 1);
3041 goto out; 3518 goto out;
3042 } 3519 }
3043 3520
@@ -3048,11 +3525,22 @@ again:
3048 em->start = extent_start + extent_offset; 3525 em->start = extent_start + extent_offset;
3049 em->len = (copy_size + root->sectorsize - 1) & 3526 em->len = (copy_size + root->sectorsize - 1) &
3050 ~((u64)root->sectorsize - 1); 3527 ~((u64)root->sectorsize - 1);
3051 map = kmap(page); 3528 if (compressed)
3529 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3052 ptr = btrfs_file_extent_inline_start(item) + extent_offset; 3530 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
3053 if (create == 0 && !PageUptodate(page)) { 3531 if (create == 0 && !PageUptodate(page)) {
3054 read_extent_buffer(leaf, map + pg_offset, ptr, 3532 if (btrfs_file_extent_compression(leaf, item) ==
3055 copy_size); 3533 BTRFS_COMPRESS_ZLIB) {
3534 ret = uncompress_inline(path, inode, page,
3535 pg_offset,
3536 extent_offset, item);
3537 BUG_ON(ret);
3538 } else {
3539 map = kmap(page);
3540 read_extent_buffer(leaf, map + pg_offset, ptr,
3541 copy_size);
3542 kunmap(page);
3543 }
3056 flush_dcache_page(page); 3544 flush_dcache_page(page);
3057 } else if (create && PageUptodate(page)) { 3545 } else if (create && PageUptodate(page)) {
3058 if (!trans) { 3546 if (!trans) {
@@ -3063,11 +3551,12 @@ again:
3063 trans = btrfs_join_transaction(root, 1); 3551 trans = btrfs_join_transaction(root, 1);
3064 goto again; 3552 goto again;
3065 } 3553 }
3554 map = kmap(page);
3066 write_extent_buffer(leaf, map + pg_offset, ptr, 3555 write_extent_buffer(leaf, map + pg_offset, ptr,
3067 copy_size); 3556 copy_size);
3557 kunmap(page);
3068 btrfs_mark_buffer_dirty(leaf); 3558 btrfs_mark_buffer_dirty(leaf);
3069 } 3559 }
3070 kunmap(page);
3071 set_extent_uptodate(io_tree, em->start, 3560 set_extent_uptodate(io_tree, em->start,
3072 extent_map_end(em) - 1, GFP_NOFS); 3561 extent_map_end(em) - 1, GFP_NOFS);
3073 goto insert; 3562 goto insert;
@@ -3779,6 +4268,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
3779 btrfs_set_file_extent_generation(leaf, ei, trans->transid); 4268 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
3780 btrfs_set_file_extent_type(leaf, ei, 4269 btrfs_set_file_extent_type(leaf, ei,
3781 BTRFS_FILE_EXTENT_INLINE); 4270 BTRFS_FILE_EXTENT_INLINE);
4271 btrfs_set_file_extent_encryption(leaf, ei, 0);
4272 btrfs_set_file_extent_compression(leaf, ei, 0);
4273 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
4274 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
4275
3782 ptr = btrfs_file_extent_inline_start(ei); 4276 ptr = btrfs_file_extent_inline_start(ei);
3783 write_extent_buffer(leaf, symname, ptr, name_len); 4277 write_extent_buffer(leaf, symname, ptr, name_len);
3784 btrfs_mark_buffer_dirty(leaf); 4278 btrfs_mark_buffer_dirty(leaf);