diff options
author | Chris Mason <chris.mason@oracle.com> | 2007-06-18 09:57:58 -0400 |
---|---|---|
committer | David Woodhouse <dwmw2@hera.kernel.org> | 2007-06-18 09:57:58 -0400 |
commit | 8c2383c3dd2cb5bb39598ce4fa97154bc591020a (patch) | |
tree | 68a9c491bfa19d572dca6b41e5dde7f05165c270 /fs/btrfs/file.c | |
parent | 9ebefb180bad4914a31c4e1748ba187a30e1e990 (diff) |
Subject: Rework btrfs_file_write to only allocate while page locks are held
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 244 |
1 files changed, 133 insertions, 111 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index de8d47b44e12..6b455c2b3f03 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -207,6 +207,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
207 | } | 207 | } |
208 | path->slots[0]--; | 208 | path->slots[0]--; |
209 | } | 209 | } |
210 | next_slot: | ||
210 | keep = 0; | 211 | keep = 0; |
211 | bookend = 0; | 212 | bookend = 0; |
212 | found_extent = 0; | 213 | found_extent = 0; |
@@ -214,39 +215,48 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
214 | extent = NULL; | 215 | extent = NULL; |
215 | leaf = btrfs_buffer_leaf(path->nodes[0]); | 216 | leaf = btrfs_buffer_leaf(path->nodes[0]); |
216 | slot = path->slots[0]; | 217 | slot = path->slots[0]; |
218 | ret = 0; | ||
217 | btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); | 219 | btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); |
218 | if (key.offset >= end || key.objectid != inode->i_ino) { | 220 | if (key.offset >= end || key.objectid != inode->i_ino) { |
219 | ret = 0; | ||
220 | goto out; | 221 | goto out; |
221 | } | 222 | } |
222 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { | 223 | if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) { |
223 | ret = 0; | ||
224 | goto out; | 224 | goto out; |
225 | } | 225 | } |
226 | extent = btrfs_item_ptr(leaf, slot, | 226 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { |
227 | struct btrfs_file_extent_item); | 227 | extent = btrfs_item_ptr(leaf, slot, |
228 | found_type = btrfs_file_extent_type(extent); | 228 | struct btrfs_file_extent_item); |
229 | if (found_type == BTRFS_FILE_EXTENT_REG) { | 229 | found_type = btrfs_file_extent_type(extent); |
230 | extent_end = key.offset + | 230 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
231 | (btrfs_file_extent_num_blocks(extent) << | 231 | extent_end = key.offset + |
232 | inode->i_blkbits); | 232 | (btrfs_file_extent_num_blocks(extent) << |
233 | found_extent = 1; | 233 | inode->i_blkbits); |
234 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 234 | found_extent = 1; |
235 | found_inline = 1; | 235 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
236 | extent_end = key.offset + | 236 | found_inline = 1; |
237 | btrfs_file_extent_inline_len(leaf->items + slot); | 237 | extent_end = key.offset + |
238 | btrfs_file_extent_inline_len(leaf->items + | ||
239 | slot); | ||
240 | } | ||
241 | } else { | ||
242 | extent_end = search_start; | ||
238 | } | 243 | } |
239 | 244 | ||
240 | /* we found nothing we can drop */ | 245 | /* we found nothing we can drop */ |
241 | if (!found_extent && !found_inline) { | 246 | if ((!found_extent && !found_inline) || |
242 | ret = 0; | 247 | search_start >= extent_end) { |
243 | goto out; | 248 | int nextret; |
244 | } | 249 | u32 nritems; |
245 | 250 | nritems = btrfs_header_nritems( | |
246 | /* we found nothing inside the range */ | 251 | btrfs_buffer_header(path->nodes[0])); |
247 | if (search_start >= extent_end) { | 252 | if (slot >= nritems - 1) { |
248 | ret = 0; | 253 | nextret = btrfs_next_leaf(root, path); |
249 | goto out; | 254 | if (nextret) |
255 | goto out; | ||
256 | } else { | ||
257 | path->slots[0]++; | ||
258 | } | ||
259 | goto next_slot; | ||
250 | } | 260 | } |
251 | 261 | ||
252 | /* FIXME, there's only one inline extent allowed right now */ | 262 | /* FIXME, there's only one inline extent allowed right now */ |
@@ -272,7 +282,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
272 | WARN_ON(found_inline); | 282 | WARN_ON(found_inline); |
273 | bookend = 1; | 283 | bookend = 1; |
274 | } | 284 | } |
275 | |||
276 | /* truncate existing extent */ | 285 | /* truncate existing extent */ |
277 | if (start > key.offset) { | 286 | if (start > key.offset) { |
278 | u64 new_num; | 287 | u64 new_num; |
@@ -337,10 +346,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
337 | ins.offset = end; | 346 | ins.offset = end; |
338 | ins.flags = 0; | 347 | ins.flags = 0; |
339 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); | 348 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); |
340 | |||
341 | btrfs_release_path(root, path); | 349 | btrfs_release_path(root, path); |
342 | ret = btrfs_insert_empty_item(trans, root, path, &ins, | 350 | ret = btrfs_insert_empty_item(trans, root, path, &ins, |
343 | sizeof(*extent)); | 351 | sizeof(*extent)); |
352 | |||
353 | if (ret) { | ||
354 | btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); | ||
355 | printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end); | ||
356 | } | ||
344 | BUG_ON(ret); | 357 | BUG_ON(ret); |
345 | extent = btrfs_item_ptr( | 358 | extent = btrfs_item_ptr( |
346 | btrfs_buffer_leaf(path->nodes[0]), | 359 | btrfs_buffer_leaf(path->nodes[0]), |
@@ -387,8 +400,7 @@ static int prepare_pages(struct btrfs_root *root, | |||
387 | loff_t pos, | 400 | loff_t pos, |
388 | unsigned long first_index, | 401 | unsigned long first_index, |
389 | unsigned long last_index, | 402 | unsigned long last_index, |
390 | size_t write_bytes, | 403 | size_t write_bytes) |
391 | u64 alloc_extent_start) | ||
392 | { | 404 | { |
393 | int i; | 405 | int i; |
394 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | 406 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
@@ -399,6 +411,16 @@ static int prepare_pages(struct btrfs_root *root, | |||
399 | struct buffer_head *bh; | 411 | struct buffer_head *bh; |
400 | struct buffer_head *head; | 412 | struct buffer_head *head; |
401 | loff_t isize = i_size_read(inode); | 413 | loff_t isize = i_size_read(inode); |
414 | struct btrfs_trans_handle *trans; | ||
415 | u64 hint_block; | ||
416 | u64 num_blocks; | ||
417 | u64 alloc_extent_start; | ||
418 | u64 start_pos; | ||
419 | struct btrfs_key ins; | ||
420 | |||
421 | start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); | ||
422 | num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> | ||
423 | inode->i_blkbits; | ||
402 | 424 | ||
403 | memset(pages, 0, num_pages * sizeof(struct page *)); | 425 | memset(pages, 0, num_pages * sizeof(struct page *)); |
404 | 426 | ||
@@ -408,6 +430,72 @@ static int prepare_pages(struct btrfs_root *root, | |||
408 | err = -ENOMEM; | 430 | err = -ENOMEM; |
409 | goto failed_release; | 431 | goto failed_release; |
410 | } | 432 | } |
433 | } | ||
434 | |||
435 | mutex_lock(&root->fs_info->fs_mutex); | ||
436 | trans = btrfs_start_transaction(root, 1); | ||
437 | if (!trans) { | ||
438 | err = -ENOMEM; | ||
439 | mutex_unlock(&root->fs_info->fs_mutex); | ||
440 | goto out_unlock; | ||
441 | } | ||
442 | btrfs_set_trans_block_group(trans, inode); | ||
443 | /* FIXME blocksize != 4096 */ | ||
444 | inode->i_blocks += num_blocks << 3; | ||
445 | hint_block = 0; | ||
446 | |||
447 | /* FIXME...EIEIO, ENOSPC and more */ | ||
448 | |||
449 | /* step one, delete the existing extents in this range */ | ||
450 | /* FIXME blocksize != pagesize */ | ||
451 | if (start_pos < inode->i_size) { | ||
452 | err = btrfs_drop_extents(trans, root, inode, | ||
453 | start_pos, (pos + write_bytes + root->blocksize -1) & | ||
454 | ~((u64)root->blocksize - 1), &hint_block); | ||
455 | BUG_ON(err); | ||
456 | } | ||
457 | |||
458 | /* insert any holes we need to create */ | ||
459 | if (inode->i_size < start_pos) { | ||
460 | u64 last_pos_in_file; | ||
461 | u64 hole_size; | ||
462 | u64 mask = root->blocksize - 1; | ||
463 | last_pos_in_file = (isize + mask) & ~mask; | ||
464 | hole_size = (start_pos - last_pos_in_file + mask) & ~mask; | ||
465 | hole_size >>= inode->i_blkbits; | ||
466 | if (last_pos_in_file < start_pos) { | ||
467 | err = btrfs_insert_file_extent(trans, root, | ||
468 | inode->i_ino, | ||
469 | last_pos_in_file, | ||
470 | 0, 0, hole_size); | ||
471 | } | ||
472 | BUG_ON(err); | ||
473 | } | ||
474 | |||
475 | /* | ||
476 | * either allocate an extent for the new bytes or setup the key | ||
477 | * to show we are doing inline data in the extent | ||
478 | */ | ||
479 | if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || | ||
480 | pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { | ||
481 | err = btrfs_alloc_extent(trans, root, inode->i_ino, | ||
482 | num_blocks, hint_block, (u64)-1, | ||
483 | &ins, 1); | ||
484 | BUG_ON(err); | ||
485 | err = btrfs_insert_file_extent(trans, root, inode->i_ino, | ||
486 | start_pos, ins.objectid, ins.offset, | ||
487 | ins.offset); | ||
488 | BUG_ON(err); | ||
489 | } else { | ||
490 | ins.offset = 0; | ||
491 | ins.objectid = 0; | ||
492 | } | ||
493 | BUG_ON(err); | ||
494 | alloc_extent_start = ins.objectid; | ||
495 | err = btrfs_end_transaction(trans, root); | ||
496 | mutex_unlock(&root->fs_info->fs_mutex); | ||
497 | |||
498 | for (i = 0; i < num_pages; i++) { | ||
411 | cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); | 499 | cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); |
412 | wait_on_page_writeback(pages[i]); | 500 | wait_on_page_writeback(pages[i]); |
413 | offset = pos & (PAGE_CACHE_SIZE -1); | 501 | offset = pos & (PAGE_CACHE_SIZE -1); |
@@ -444,6 +532,11 @@ failed_truncate: | |||
444 | if (pos > isize) | 532 | if (pos > isize) |
445 | vmtruncate(inode, isize); | 533 | vmtruncate(inode, isize); |
446 | return err; | 534 | return err; |
535 | |||
536 | out_unlock: | ||
537 | mutex_unlock(&root->fs_info->fs_mutex); | ||
538 | goto failed_release; | ||
539 | |||
447 | } | 540 | } |
448 | 541 | ||
449 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 542 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, |
@@ -455,16 +548,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
455 | int ret = 0; | 548 | int ret = 0; |
456 | struct inode *inode = file->f_path.dentry->d_inode; | 549 | struct inode *inode = file->f_path.dentry->d_inode; |
457 | struct btrfs_root *root = BTRFS_I(inode)->root; | 550 | struct btrfs_root *root = BTRFS_I(inode)->root; |
458 | struct page *pages[8]; | 551 | struct page **pages = NULL; |
552 | int nrptrs; | ||
459 | struct page *pinned[2]; | 553 | struct page *pinned[2]; |
460 | unsigned long first_index; | 554 | unsigned long first_index; |
461 | unsigned long last_index; | 555 | unsigned long last_index; |
462 | u64 start_pos; | 556 | |
463 | u64 num_blocks; | 557 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, |
464 | u64 alloc_extent_start; | 558 | PAGE_CACHE_SIZE / (sizeof(struct page *))); |
465 | u64 hint_block; | ||
466 | struct btrfs_trans_handle *trans; | ||
467 | struct btrfs_key ins; | ||
468 | pinned[0] = NULL; | 559 | pinned[0] = NULL; |
469 | pinned[1] = NULL; | 560 | pinned[1] = NULL; |
470 | if (file->f_flags & O_DIRECT) | 561 | if (file->f_flags & O_DIRECT) |
@@ -482,9 +573,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
482 | goto out; | 573 | goto out; |
483 | file_update_time(file); | 574 | file_update_time(file); |
484 | 575 | ||
485 | start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); | 576 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
486 | num_blocks = (count + pos - start_pos + root->blocksize - 1) >> | ||
487 | inode->i_blkbits; | ||
488 | 577 | ||
489 | mutex_lock(&inode->i_mutex); | 578 | mutex_lock(&inode->i_mutex); |
490 | first_index = pos >> PAGE_CACHE_SHIFT; | 579 | first_index = pos >> PAGE_CACHE_SHIFT; |
@@ -516,87 +605,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
516 | } | 605 | } |
517 | } | 606 | } |
518 | 607 | ||
519 | mutex_lock(&root->fs_info->fs_mutex); | ||
520 | trans = btrfs_start_transaction(root, 1); | ||
521 | if (!trans) { | ||
522 | err = -ENOMEM; | ||
523 | mutex_unlock(&root->fs_info->fs_mutex); | ||
524 | goto out_unlock; | ||
525 | } | ||
526 | btrfs_set_trans_block_group(trans, inode); | ||
527 | /* FIXME blocksize != 4096 */ | ||
528 | inode->i_blocks += num_blocks << 3; | ||
529 | hint_block = 0; | ||
530 | |||
531 | /* FIXME...EIEIO, ENOSPC and more */ | ||
532 | |||
533 | /* step one, delete the existing extents in this range */ | ||
534 | if (start_pos < inode->i_size) { | ||
535 | /* FIXME blocksize != pagesize */ | ||
536 | ret = btrfs_drop_extents(trans, root, inode, | ||
537 | start_pos, | ||
538 | (pos + count + root->blocksize -1) & | ||
539 | ~((u64)root->blocksize - 1), | ||
540 | &hint_block); | ||
541 | BUG_ON(ret); | ||
542 | } | ||
543 | |||
544 | /* insert any holes we need to create */ | ||
545 | if (inode->i_size < start_pos) { | ||
546 | u64 last_pos_in_file; | ||
547 | u64 hole_size; | ||
548 | u64 mask = root->blocksize - 1; | ||
549 | last_pos_in_file = (inode->i_size + mask) & ~mask; | ||
550 | hole_size = (start_pos - last_pos_in_file + mask) & ~mask; | ||
551 | hole_size >>= inode->i_blkbits; | ||
552 | if (last_pos_in_file < start_pos) { | ||
553 | ret = btrfs_insert_file_extent(trans, root, | ||
554 | inode->i_ino, | ||
555 | last_pos_in_file, | ||
556 | 0, 0, hole_size); | ||
557 | } | ||
558 | BUG_ON(ret); | ||
559 | } | ||
560 | |||
561 | /* | ||
562 | * either allocate an extent for the new bytes or setup the key | ||
563 | * to show we are doing inline data in the extent | ||
564 | */ | ||
565 | if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || | ||
566 | pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { | ||
567 | ret = btrfs_alloc_extent(trans, root, inode->i_ino, | ||
568 | num_blocks, hint_block, (u64)-1, | ||
569 | &ins, 1); | ||
570 | BUG_ON(ret); | ||
571 | ret = btrfs_insert_file_extent(trans, root, inode->i_ino, | ||
572 | start_pos, ins.objectid, ins.offset, | ||
573 | ins.offset); | ||
574 | BUG_ON(ret); | ||
575 | } else { | ||
576 | ins.offset = 0; | ||
577 | ins.objectid = 0; | ||
578 | } | ||
579 | BUG_ON(ret); | ||
580 | alloc_extent_start = ins.objectid; | ||
581 | ret = btrfs_end_transaction(trans, root); | ||
582 | mutex_unlock(&root->fs_info->fs_mutex); | ||
583 | |||
584 | while(count > 0) { | 608 | while(count > 0) { |
585 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 609 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
586 | size_t write_bytes = min(count, | 610 | size_t write_bytes = min(count, nrptrs * PAGE_CACHE_SIZE - |
587 | (size_t)PAGE_CACHE_SIZE - offset); | 611 | offset); |
588 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 612 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
589 | PAGE_CACHE_SHIFT; | 613 | PAGE_CACHE_SHIFT; |
590 | 614 | ||
615 | WARN_ON(num_pages > nrptrs); | ||
591 | memset(pages, 0, sizeof(pages)); | 616 | memset(pages, 0, sizeof(pages)); |
592 | ret = prepare_pages(root, file, pages, num_pages, | 617 | ret = prepare_pages(root, file, pages, num_pages, |
593 | pos, first_index, last_index, | 618 | pos, first_index, last_index, |
594 | write_bytes, alloc_extent_start); | 619 | write_bytes); |
595 | BUG_ON(ret); | 620 | BUG_ON(ret); |
596 | 621 | ||
597 | /* FIXME blocks != pagesize */ | ||
598 | if (alloc_extent_start) | ||
599 | alloc_extent_start += num_pages; | ||
600 | ret = btrfs_copy_from_user(pos, num_pages, | 622 | ret = btrfs_copy_from_user(pos, num_pages, |
601 | write_bytes, pages, buf); | 623 | write_bytes, pages, buf); |
602 | BUG_ON(ret); | 624 | BUG_ON(ret); |
@@ -611,13 +633,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
611 | pos += write_bytes; | 633 | pos += write_bytes; |
612 | num_written += write_bytes; | 634 | num_written += write_bytes; |
613 | 635 | ||
614 | balance_dirty_pages_ratelimited(inode->i_mapping); | 636 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); |
615 | btrfs_btree_balance_dirty(root); | 637 | btrfs_btree_balance_dirty(root); |
616 | cond_resched(); | 638 | cond_resched(); |
617 | } | 639 | } |
618 | out_unlock: | ||
619 | mutex_unlock(&inode->i_mutex); | 640 | mutex_unlock(&inode->i_mutex); |
620 | out: | 641 | out: |
642 | kfree(pages); | ||
621 | if (pinned[0]) | 643 | if (pinned[0]) |
622 | page_cache_release(pinned[0]); | 644 | page_cache_release(pinned[0]); |
623 | if (pinned[1]) | 645 | if (pinned[1]) |