diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-24 17:14:46 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-24 17:14:46 -0400 |
| commit | 9978306e31a8f89bd81fbc4c49fd9aefb1d30d10 (patch) | |
| tree | 85bbd03336a82d20a00761ed35eb05536936b881 | |
| parent | abe81e25f08abbac493754a043f7a91a1b3e0f93 (diff) | |
| parent | 14c26c6a05de138a4fd9a0c05ff8e7435a618324 (diff) | |
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
Pull XFS update from Ben Myers:
- Removal of xfsbufd
- Background CIL flushes have been moved to a workqueue.
- Fix to xfs_check_page_type applicable to filesystems where
blocksize < page size
- Fix for stale data exposure when extsize hints are used.
- A series of xfs_buf cache cleanups.
- Fix for XFS_IOC_ALLOCSP
- Cleanups for includes and removal of xfs_lrw.[ch].
- Moved all busy extent handling to it's own file so that it is easier
to merge with userspace.
- Fix for log mount failure.
- Fix to enable inode reclaim during quotacheck at mount time.
- Fix for delalloc quota accounting.
- Fix for memory reclaim deadlock on agi buffer.
- Fixes for failed writes and to clean up stale delalloc blocks.
- Fix to use GFP_NOFS in blkdev_issue_flush
- SEEK_DATA/SEEK_HOLE support
* 'for-linus' of git://oss.sgi.com/xfs/xfs: (57 commits)
xfs: add trace points for log forces
xfs: fix memory reclaim deadlock on agi buffer
xfs: fix delalloc quota accounting on failure
xfs: protect xfs_sync_worker with s_umount semaphore
xfs: introduce SEEK_DATA/SEEK_HOLE support
xfs: make xfs_extent_busy_trim not static
xfs: make XBF_MAPPED the default behaviour
xfs: flush outstanding buffers on log mount failure
xfs: Properly exclude IO type flags from buffer flags
xfs: clean up xfs_bit.h includes
xfs: move xfs_do_force_shutdown() and kill xfs_rw.c
xfs: move xfs_get_extsz_hint() and kill xfs_rw.h
xfs: move xfs_fsb_to_db to xfs_bmap.h
xfs: clean up busy extent naming
xfs: move busy extent handling to it's own file
xfs: move xfsagino_t to xfs_types.h
xfs: use iolock on XFS_IOC_ALLOCSP calls
xfs: kill XBF_DONTBLOCK
xfs: kill xfs_read_buf()
xfs: kill XBF_LOCK
...
80 files changed, 2459 insertions, 2852 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index eaff0392eb32..150a29f3cd33 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -7623,7 +7623,7 @@ XFS FILESYSTEM | |||
| 7623 | P: Silicon Graphics Inc | 7623 | P: Silicon Graphics Inc |
| 7624 | M: Ben Myers <bpm@sgi.com> | 7624 | M: Ben Myers <bpm@sgi.com> |
| 7625 | M: Alex Elder <elder@kernel.org> | 7625 | M: Alex Elder <elder@kernel.org> |
| 7626 | M: xfs-masters@oss.sgi.com | 7626 | M: xfs@oss.sgi.com |
| 7627 | L: xfs@oss.sgi.com | 7627 | L: xfs@oss.sgi.com |
| 7628 | W: http://oss.sgi.com/projects/xfs | 7628 | W: http://oss.sgi.com/projects/xfs |
| 7629 | T: git git://oss.sgi.com/xfs/xfs.git | 7629 | T: git git://oss.sgi.com/xfs/xfs.git |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0a9977983f92..d2bf974b1a2f 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
| @@ -33,6 +33,7 @@ xfs-y += xfs_aops.o \ | |||
| 33 | xfs_discard.o \ | 33 | xfs_discard.o \ |
| 34 | xfs_error.o \ | 34 | xfs_error.o \ |
| 35 | xfs_export.o \ | 35 | xfs_export.o \ |
| 36 | xfs_extent_busy.o \ | ||
| 36 | xfs_file.o \ | 37 | xfs_file.o \ |
| 37 | xfs_filestream.o \ | 38 | xfs_filestream.o \ |
| 38 | xfs_fsops.o \ | 39 | xfs_fsops.o \ |
| @@ -49,7 +50,6 @@ xfs-y += xfs_aops.o \ | |||
| 49 | xfs_sync.o \ | 50 | xfs_sync.o \ |
| 50 | xfs_xattr.o \ | 51 | xfs_xattr.o \ |
| 51 | xfs_rename.o \ | 52 | xfs_rename.o \ |
| 52 | xfs_rw.o \ | ||
| 53 | xfs_utils.o \ | 53 | xfs_utils.o \ |
| 54 | xfs_vnodeops.o \ | 54 | xfs_vnodeops.o \ |
| 55 | kmem.o \ | 55 | kmem.o \ |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 4805f009f923..44d65c1533c0 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
| @@ -175,24 +175,6 @@ typedef struct xfs_agfl { | |||
| 175 | } xfs_agfl_t; | 175 | } xfs_agfl_t; |
| 176 | 176 | ||
| 177 | /* | 177 | /* |
| 178 | * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that | ||
| 179 | * have been freed but whose transactions aren't committed to disk yet. | ||
| 180 | * | ||
| 181 | * Note that we use the transaction ID to record the transaction, not the | ||
| 182 | * transaction structure itself. See xfs_alloc_busy_insert() for details. | ||
| 183 | */ | ||
| 184 | struct xfs_busy_extent { | ||
| 185 | struct rb_node rb_node; /* ag by-bno indexed search tree */ | ||
| 186 | struct list_head list; /* transaction busy extent list */ | ||
| 187 | xfs_agnumber_t agno; | ||
| 188 | xfs_agblock_t bno; | ||
| 189 | xfs_extlen_t length; | ||
| 190 | unsigned int flags; | ||
| 191 | #define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ | ||
| 192 | #define XFS_ALLOC_BUSY_SKIP_DISCARD 0x02 /* do not discard */ | ||
| 193 | }; | ||
| 194 | |||
| 195 | /* | ||
| 196 | * Per-ag incore structure, copies of information in agf and agi, | 178 | * Per-ag incore structure, copies of information in agf and agi, |
| 197 | * to improve the performance of allocation group selection. | 179 | * to improve the performance of allocation group selection. |
| 198 | */ | 180 | */ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 0f0df2759b09..229641fb8e67 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
| @@ -32,6 +31,7 @@ | |||
| 32 | #include "xfs_inode.h" | 31 | #include "xfs_inode.h" |
| 33 | #include "xfs_btree.h" | 32 | #include "xfs_btree.h" |
| 34 | #include "xfs_alloc.h" | 33 | #include "xfs_alloc.h" |
| 34 | #include "xfs_extent_busy.h" | ||
| 35 | #include "xfs_error.h" | 35 | #include "xfs_error.h" |
| 36 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
| 37 | 37 | ||
| @@ -47,8 +47,6 @@ STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); | |||
| 47 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); | 47 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); |
| 48 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, | 48 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, |
| 49 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); | 49 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); |
| 50 | STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *, | ||
| 51 | xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *); | ||
| 52 | 50 | ||
| 53 | /* | 51 | /* |
| 54 | * Lookup the record equal to [bno, len] in the btree given by cur. | 52 | * Lookup the record equal to [bno, len] in the btree given by cur. |
| @@ -152,7 +150,7 @@ xfs_alloc_compute_aligned( | |||
| 152 | xfs_extlen_t len; | 150 | xfs_extlen_t len; |
| 153 | 151 | ||
| 154 | /* Trim busy sections out of found extent */ | 152 | /* Trim busy sections out of found extent */ |
| 155 | xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len); | 153 | xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); |
| 156 | 154 | ||
| 157 | if (args->alignment > 1 && len >= args->minlen) { | 155 | if (args->alignment > 1 && len >= args->minlen) { |
| 158 | xfs_agblock_t aligned_bno = roundup(bno, args->alignment); | 156 | xfs_agblock_t aligned_bno = roundup(bno, args->alignment); |
| @@ -536,7 +534,7 @@ xfs_alloc_ag_vextent( | |||
| 536 | if (error) | 534 | if (error) |
| 537 | return error; | 535 | return error; |
| 538 | 536 | ||
| 539 | ASSERT(!xfs_alloc_busy_search(args->mp, args->agno, | 537 | ASSERT(!xfs_extent_busy_search(args->mp, args->agno, |
| 540 | args->agbno, args->len)); | 538 | args->agbno, args->len)); |
| 541 | } | 539 | } |
| 542 | 540 | ||
| @@ -603,7 +601,7 @@ xfs_alloc_ag_vextent_exact( | |||
| 603 | /* | 601 | /* |
| 604 | * Check for overlapping busy extents. | 602 | * Check for overlapping busy extents. |
| 605 | */ | 603 | */ |
| 606 | xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen); | 604 | xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); |
| 607 | 605 | ||
| 608 | /* | 606 | /* |
| 609 | * Give up if the start of the extent is busy, or the freespace isn't | 607 | * Give up if the start of the extent is busy, or the freespace isn't |
| @@ -1391,7 +1389,7 @@ xfs_alloc_ag_vextent_small( | |||
| 1391 | if (error) | 1389 | if (error) |
| 1392 | goto error0; | 1390 | goto error0; |
| 1393 | if (fbno != NULLAGBLOCK) { | 1391 | if (fbno != NULLAGBLOCK) { |
| 1394 | xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1, | 1392 | xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, |
| 1395 | args->userdata); | 1393 | args->userdata); |
| 1396 | 1394 | ||
| 1397 | if (args->userdata) { | 1395 | if (args->userdata) { |
| @@ -2496,579 +2494,8 @@ xfs_free_extent( | |||
| 2496 | 2494 | ||
| 2497 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); | 2495 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); |
| 2498 | if (!error) | 2496 | if (!error) |
| 2499 | xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0); | 2497 | xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); |
| 2500 | error0: | 2498 | error0: |
| 2501 | xfs_perag_put(args.pag); | 2499 | xfs_perag_put(args.pag); |
| 2502 | return error; | 2500 | return error; |
| 2503 | } | 2501 | } |
| 2504 | |||
| 2505 | void | ||
| 2506 | xfs_alloc_busy_insert( | ||
| 2507 | struct xfs_trans *tp, | ||
| 2508 | xfs_agnumber_t agno, | ||
| 2509 | xfs_agblock_t bno, | ||
| 2510 | xfs_extlen_t len, | ||
| 2511 | unsigned int flags) | ||
| 2512 | { | ||
| 2513 | struct xfs_busy_extent *new; | ||
| 2514 | struct xfs_busy_extent *busyp; | ||
| 2515 | struct xfs_perag *pag; | ||
| 2516 | struct rb_node **rbp; | ||
| 2517 | struct rb_node *parent = NULL; | ||
| 2518 | |||
| 2519 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); | ||
| 2520 | if (!new) { | ||
| 2521 | /* | ||
| 2522 | * No Memory! Since it is now not possible to track the free | ||
| 2523 | * block, make this a synchronous transaction to insure that | ||
| 2524 | * the block is not reused before this transaction commits. | ||
| 2525 | */ | ||
| 2526 | trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len); | ||
| 2527 | xfs_trans_set_sync(tp); | ||
| 2528 | return; | ||
| 2529 | } | ||
| 2530 | |||
| 2531 | new->agno = agno; | ||
| 2532 | new->bno = bno; | ||
| 2533 | new->length = len; | ||
| 2534 | INIT_LIST_HEAD(&new->list); | ||
| 2535 | new->flags = flags; | ||
| 2536 | |||
| 2537 | /* trace before insert to be able to see failed inserts */ | ||
| 2538 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); | ||
| 2539 | |||
| 2540 | pag = xfs_perag_get(tp->t_mountp, new->agno); | ||
| 2541 | spin_lock(&pag->pagb_lock); | ||
| 2542 | rbp = &pag->pagb_tree.rb_node; | ||
| 2543 | while (*rbp) { | ||
| 2544 | parent = *rbp; | ||
| 2545 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | ||
| 2546 | |||
| 2547 | if (new->bno < busyp->bno) { | ||
| 2548 | rbp = &(*rbp)->rb_left; | ||
| 2549 | ASSERT(new->bno + new->length <= busyp->bno); | ||
| 2550 | } else if (new->bno > busyp->bno) { | ||
| 2551 | rbp = &(*rbp)->rb_right; | ||
| 2552 | ASSERT(bno >= busyp->bno + busyp->length); | ||
| 2553 | } else { | ||
| 2554 | ASSERT(0); | ||
| 2555 | } | ||
| 2556 | } | ||
| 2557 | |||
| 2558 | rb_link_node(&new->rb_node, parent, rbp); | ||
| 2559 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | ||
| 2560 | |||
| 2561 | list_add(&new->list, &tp->t_busy); | ||
| 2562 | spin_unlock(&pag->pagb_lock); | ||
| 2563 | xfs_perag_put(pag); | ||
| 2564 | } | ||
| 2565 | |||
| 2566 | /* | ||
| 2567 | * Search for a busy extent within the range of the extent we are about to | ||
| 2568 | * allocate. You need to be holding the busy extent tree lock when calling | ||
| 2569 | * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy | ||
| 2570 | * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact | ||
| 2571 | * match. This is done so that a non-zero return indicates an overlap that | ||
| 2572 | * will require a synchronous transaction, but it can still be | ||
| 2573 | * used to distinguish between a partial or exact match. | ||
| 2574 | */ | ||
| 2575 | int | ||
| 2576 | xfs_alloc_busy_search( | ||
| 2577 | struct xfs_mount *mp, | ||
| 2578 | xfs_agnumber_t agno, | ||
| 2579 | xfs_agblock_t bno, | ||
| 2580 | xfs_extlen_t len) | ||
| 2581 | { | ||
| 2582 | struct xfs_perag *pag; | ||
| 2583 | struct rb_node *rbp; | ||
| 2584 | struct xfs_busy_extent *busyp; | ||
| 2585 | int match = 0; | ||
| 2586 | |||
| 2587 | pag = xfs_perag_get(mp, agno); | ||
| 2588 | spin_lock(&pag->pagb_lock); | ||
| 2589 | |||
| 2590 | rbp = pag->pagb_tree.rb_node; | ||
| 2591 | |||
| 2592 | /* find closest start bno overlap */ | ||
| 2593 | while (rbp) { | ||
| 2594 | busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
| 2595 | if (bno < busyp->bno) { | ||
| 2596 | /* may overlap, but exact start block is lower */ | ||
| 2597 | if (bno + len > busyp->bno) | ||
| 2598 | match = -1; | ||
| 2599 | rbp = rbp->rb_left; | ||
| 2600 | } else if (bno > busyp->bno) { | ||
| 2601 | /* may overlap, but exact start block is higher */ | ||
| 2602 | if (bno < busyp->bno + busyp->length) | ||
| 2603 | match = -1; | ||
| 2604 | rbp = rbp->rb_right; | ||
| 2605 | } else { | ||
| 2606 | /* bno matches busyp, length determines exact match */ | ||
| 2607 | match = (busyp->length == len) ? 1 : -1; | ||
| 2608 | break; | ||
| 2609 | } | ||
| 2610 | } | ||
| 2611 | spin_unlock(&pag->pagb_lock); | ||
| 2612 | xfs_perag_put(pag); | ||
| 2613 | return match; | ||
| 2614 | } | ||
| 2615 | |||
| 2616 | /* | ||
| 2617 | * The found free extent [fbno, fend] overlaps part or all of the given busy | ||
| 2618 | * extent. If the overlap covers the beginning, the end, or all of the busy | ||
| 2619 | * extent, the overlapping portion can be made unbusy and used for the | ||
| 2620 | * allocation. We can't split a busy extent because we can't modify a | ||
| 2621 | * transaction/CIL context busy list, but we can update an entries block | ||
| 2622 | * number or length. | ||
| 2623 | * | ||
| 2624 | * Returns true if the extent can safely be reused, or false if the search | ||
| 2625 | * needs to be restarted. | ||
| 2626 | */ | ||
| 2627 | STATIC bool | ||
| 2628 | xfs_alloc_busy_update_extent( | ||
| 2629 | struct xfs_mount *mp, | ||
| 2630 | struct xfs_perag *pag, | ||
| 2631 | struct xfs_busy_extent *busyp, | ||
| 2632 | xfs_agblock_t fbno, | ||
| 2633 | xfs_extlen_t flen, | ||
| 2634 | bool userdata) | ||
| 2635 | { | ||
| 2636 | xfs_agblock_t fend = fbno + flen; | ||
| 2637 | xfs_agblock_t bbno = busyp->bno; | ||
| 2638 | xfs_agblock_t bend = bbno + busyp->length; | ||
| 2639 | |||
| 2640 | /* | ||
| 2641 | * This extent is currently being discarded. Give the thread | ||
| 2642 | * performing the discard a chance to mark the extent unbusy | ||
| 2643 | * and retry. | ||
| 2644 | */ | ||
| 2645 | if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) { | ||
| 2646 | spin_unlock(&pag->pagb_lock); | ||
| 2647 | delay(1); | ||
| 2648 | spin_lock(&pag->pagb_lock); | ||
| 2649 | return false; | ||
| 2650 | } | ||
| 2651 | |||
| 2652 | /* | ||
| 2653 | * If there is a busy extent overlapping a user allocation, we have | ||
| 2654 | * no choice but to force the log and retry the search. | ||
| 2655 | * | ||
| 2656 | * Fortunately this does not happen during normal operation, but | ||
| 2657 | * only if the filesystem is very low on space and has to dip into | ||
| 2658 | * the AGFL for normal allocations. | ||
| 2659 | */ | ||
| 2660 | if (userdata) | ||
| 2661 | goto out_force_log; | ||
| 2662 | |||
| 2663 | if (bbno < fbno && bend > fend) { | ||
| 2664 | /* | ||
| 2665 | * Case 1: | ||
| 2666 | * bbno bend | ||
| 2667 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2668 | * +---------+ | ||
| 2669 | * fbno fend | ||
| 2670 | */ | ||
| 2671 | |||
| 2672 | /* | ||
| 2673 | * We would have to split the busy extent to be able to track | ||
| 2674 | * it correct, which we cannot do because we would have to | ||
| 2675 | * modify the list of busy extents attached to the transaction | ||
| 2676 | * or CIL context, which is immutable. | ||
| 2677 | * | ||
| 2678 | * Force out the log to clear the busy extent and retry the | ||
| 2679 | * search. | ||
| 2680 | */ | ||
| 2681 | goto out_force_log; | ||
| 2682 | } else if (bbno >= fbno && bend <= fend) { | ||
| 2683 | /* | ||
| 2684 | * Case 2: | ||
| 2685 | * bbno bend | ||
| 2686 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2687 | * +-----------------+ | ||
| 2688 | * fbno fend | ||
| 2689 | * | ||
| 2690 | * Case 3: | ||
| 2691 | * bbno bend | ||
| 2692 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2693 | * +--------------------------+ | ||
| 2694 | * fbno fend | ||
| 2695 | * | ||
| 2696 | * Case 4: | ||
| 2697 | * bbno bend | ||
| 2698 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2699 | * +--------------------------+ | ||
| 2700 | * fbno fend | ||
| 2701 | * | ||
| 2702 | * Case 5: | ||
| 2703 | * bbno bend | ||
| 2704 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2705 | * +-----------------------------------+ | ||
| 2706 | * fbno fend | ||
| 2707 | * | ||
| 2708 | */ | ||
| 2709 | |||
| 2710 | /* | ||
| 2711 | * The busy extent is fully covered by the extent we are | ||
| 2712 | * allocating, and can simply be removed from the rbtree. | ||
| 2713 | * However we cannot remove it from the immutable list | ||
| 2714 | * tracking busy extents in the transaction or CIL context, | ||
| 2715 | * so set the length to zero to mark it invalid. | ||
| 2716 | * | ||
| 2717 | * We also need to restart the busy extent search from the | ||
| 2718 | * tree root, because erasing the node can rearrange the | ||
| 2719 | * tree topology. | ||
| 2720 | */ | ||
| 2721 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
| 2722 | busyp->length = 0; | ||
| 2723 | return false; | ||
| 2724 | } else if (fend < bend) { | ||
| 2725 | /* | ||
| 2726 | * Case 6: | ||
| 2727 | * bbno bend | ||
| 2728 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2729 | * +---------+ | ||
| 2730 | * fbno fend | ||
| 2731 | * | ||
| 2732 | * Case 7: | ||
| 2733 | * bbno bend | ||
| 2734 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2735 | * +------------------+ | ||
| 2736 | * fbno fend | ||
| 2737 | * | ||
| 2738 | */ | ||
| 2739 | busyp->bno = fend; | ||
| 2740 | } else if (bbno < fbno) { | ||
| 2741 | /* | ||
| 2742 | * Case 8: | ||
| 2743 | * bbno bend | ||
| 2744 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2745 | * +-------------+ | ||
| 2746 | * fbno fend | ||
| 2747 | * | ||
| 2748 | * Case 9: | ||
| 2749 | * bbno bend | ||
| 2750 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2751 | * +----------------------+ | ||
| 2752 | * fbno fend | ||
| 2753 | */ | ||
| 2754 | busyp->length = fbno - busyp->bno; | ||
| 2755 | } else { | ||
| 2756 | ASSERT(0); | ||
| 2757 | } | ||
| 2758 | |||
| 2759 | trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen); | ||
| 2760 | return true; | ||
| 2761 | |||
| 2762 | out_force_log: | ||
| 2763 | spin_unlock(&pag->pagb_lock); | ||
| 2764 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
| 2765 | trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen); | ||
| 2766 | spin_lock(&pag->pagb_lock); | ||
| 2767 | return false; | ||
| 2768 | } | ||
| 2769 | |||
| 2770 | |||
| 2771 | /* | ||
| 2772 | * For a given extent [fbno, flen], make sure we can reuse it safely. | ||
| 2773 | */ | ||
| 2774 | void | ||
| 2775 | xfs_alloc_busy_reuse( | ||
| 2776 | struct xfs_mount *mp, | ||
| 2777 | xfs_agnumber_t agno, | ||
| 2778 | xfs_agblock_t fbno, | ||
| 2779 | xfs_extlen_t flen, | ||
| 2780 | bool userdata) | ||
| 2781 | { | ||
| 2782 | struct xfs_perag *pag; | ||
| 2783 | struct rb_node *rbp; | ||
| 2784 | |||
| 2785 | ASSERT(flen > 0); | ||
| 2786 | |||
| 2787 | pag = xfs_perag_get(mp, agno); | ||
| 2788 | spin_lock(&pag->pagb_lock); | ||
| 2789 | restart: | ||
| 2790 | rbp = pag->pagb_tree.rb_node; | ||
| 2791 | while (rbp) { | ||
| 2792 | struct xfs_busy_extent *busyp = | ||
| 2793 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
| 2794 | xfs_agblock_t bbno = busyp->bno; | ||
| 2795 | xfs_agblock_t bend = bbno + busyp->length; | ||
| 2796 | |||
| 2797 | if (fbno + flen <= bbno) { | ||
| 2798 | rbp = rbp->rb_left; | ||
| 2799 | continue; | ||
| 2800 | } else if (fbno >= bend) { | ||
| 2801 | rbp = rbp->rb_right; | ||
| 2802 | continue; | ||
| 2803 | } | ||
| 2804 | |||
| 2805 | if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen, | ||
| 2806 | userdata)) | ||
| 2807 | goto restart; | ||
| 2808 | } | ||
| 2809 | spin_unlock(&pag->pagb_lock); | ||
| 2810 | xfs_perag_put(pag); | ||
| 2811 | } | ||
| 2812 | |||
| 2813 | /* | ||
| 2814 | * For a given extent [fbno, flen], search the busy extent list to find a | ||
| 2815 | * subset of the extent that is not busy. If *rlen is smaller than | ||
| 2816 | * args->minlen no suitable extent could be found, and the higher level | ||
| 2817 | * code needs to force out the log and retry the allocation. | ||
| 2818 | */ | ||
| 2819 | STATIC void | ||
| 2820 | xfs_alloc_busy_trim( | ||
| 2821 | struct xfs_alloc_arg *args, | ||
| 2822 | xfs_agblock_t bno, | ||
| 2823 | xfs_extlen_t len, | ||
| 2824 | xfs_agblock_t *rbno, | ||
| 2825 | xfs_extlen_t *rlen) | ||
| 2826 | { | ||
| 2827 | xfs_agblock_t fbno; | ||
| 2828 | xfs_extlen_t flen; | ||
| 2829 | struct rb_node *rbp; | ||
| 2830 | |||
| 2831 | ASSERT(len > 0); | ||
| 2832 | |||
| 2833 | spin_lock(&args->pag->pagb_lock); | ||
| 2834 | restart: | ||
| 2835 | fbno = bno; | ||
| 2836 | flen = len; | ||
| 2837 | rbp = args->pag->pagb_tree.rb_node; | ||
| 2838 | while (rbp && flen >= args->minlen) { | ||
| 2839 | struct xfs_busy_extent *busyp = | ||
| 2840 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
| 2841 | xfs_agblock_t fend = fbno + flen; | ||
| 2842 | xfs_agblock_t bbno = busyp->bno; | ||
| 2843 | xfs_agblock_t bend = bbno + busyp->length; | ||
| 2844 | |||
| 2845 | if (fend <= bbno) { | ||
| 2846 | rbp = rbp->rb_left; | ||
| 2847 | continue; | ||
| 2848 | } else if (fbno >= bend) { | ||
| 2849 | rbp = rbp->rb_right; | ||
| 2850 | continue; | ||
| 2851 | } | ||
| 2852 | |||
| 2853 | /* | ||
| 2854 | * If this is a metadata allocation, try to reuse the busy | ||
| 2855 | * extent instead of trimming the allocation. | ||
| 2856 | */ | ||
| 2857 | if (!args->userdata && | ||
| 2858 | !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) { | ||
| 2859 | if (!xfs_alloc_busy_update_extent(args->mp, args->pag, | ||
| 2860 | busyp, fbno, flen, | ||
| 2861 | false)) | ||
| 2862 | goto restart; | ||
| 2863 | continue; | ||
| 2864 | } | ||
| 2865 | |||
| 2866 | if (bbno <= fbno) { | ||
| 2867 | /* start overlap */ | ||
| 2868 | |||
| 2869 | /* | ||
| 2870 | * Case 1: | ||
| 2871 | * bbno bend | ||
| 2872 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2873 | * +---------+ | ||
| 2874 | * fbno fend | ||
| 2875 | * | ||
| 2876 | * Case 2: | ||
| 2877 | * bbno bend | ||
| 2878 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2879 | * +-------------+ | ||
| 2880 | * fbno fend | ||
| 2881 | * | ||
| 2882 | * Case 3: | ||
| 2883 | * bbno bend | ||
| 2884 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2885 | * +-------------+ | ||
| 2886 | * fbno fend | ||
| 2887 | * | ||
| 2888 | * Case 4: | ||
| 2889 | * bbno bend | ||
| 2890 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2891 | * +-----------------+ | ||
| 2892 | * fbno fend | ||
| 2893 | * | ||
| 2894 | * No unbusy region in extent, return failure. | ||
| 2895 | */ | ||
| 2896 | if (fend <= bend) | ||
| 2897 | goto fail; | ||
| 2898 | |||
| 2899 | /* | ||
| 2900 | * Case 5: | ||
| 2901 | * bbno bend | ||
| 2902 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2903 | * +----------------------+ | ||
| 2904 | * fbno fend | ||
| 2905 | * | ||
| 2906 | * Case 6: | ||
| 2907 | * bbno bend | ||
| 2908 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2909 | * +--------------------------+ | ||
| 2910 | * fbno fend | ||
| 2911 | * | ||
| 2912 | * Needs to be trimmed to: | ||
| 2913 | * +-------+ | ||
| 2914 | * fbno fend | ||
| 2915 | */ | ||
| 2916 | fbno = bend; | ||
| 2917 | } else if (bend >= fend) { | ||
| 2918 | /* end overlap */ | ||
| 2919 | |||
| 2920 | /* | ||
| 2921 | * Case 7: | ||
| 2922 | * bbno bend | ||
| 2923 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2924 | * +------------------+ | ||
| 2925 | * fbno fend | ||
| 2926 | * | ||
| 2927 | * Case 8: | ||
| 2928 | * bbno bend | ||
| 2929 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2930 | * +--------------------------+ | ||
| 2931 | * fbno fend | ||
| 2932 | * | ||
| 2933 | * Needs to be trimmed to: | ||
| 2934 | * +-------+ | ||
| 2935 | * fbno fend | ||
| 2936 | */ | ||
| 2937 | fend = bbno; | ||
| 2938 | } else { | ||
| 2939 | /* middle overlap */ | ||
| 2940 | |||
| 2941 | /* | ||
| 2942 | * Case 9: | ||
| 2943 | * bbno bend | ||
| 2944 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2945 | * +-----------------------------------+ | ||
| 2946 | * fbno fend | ||
| 2947 | * | ||
| 2948 | * Can be trimmed to: | ||
| 2949 | * +-------+ OR +-------+ | ||
| 2950 | * fbno fend fbno fend | ||
| 2951 | * | ||
| 2952 | * Backward allocation leads to significant | ||
| 2953 | * fragmentation of directories, which degrades | ||
| 2954 | * directory performance, therefore we always want to | ||
| 2955 | * choose the option that produces forward allocation | ||
| 2956 | * patterns. | ||
| 2957 | * Preferring the lower bno extent will make the next | ||
| 2958 | * request use "fend" as the start of the next | ||
| 2959 | * allocation; if the segment is no longer busy at | ||
| 2960 | * that point, we'll get a contiguous allocation, but | ||
| 2961 | * even if it is still busy, we will get a forward | ||
| 2962 | * allocation. | ||
| 2963 | * We try to avoid choosing the segment at "bend", | ||
| 2964 | * because that can lead to the next allocation | ||
| 2965 | * taking the segment at "fbno", which would be a | ||
| 2966 | * backward allocation. We only use the segment at | ||
| 2967 | * "fbno" if it is much larger than the current | ||
| 2968 | * requested size, because in that case there's a | ||
| 2969 | * good chance subsequent allocations will be | ||
| 2970 | * contiguous. | ||
| 2971 | */ | ||
| 2972 | if (bbno - fbno >= args->maxlen) { | ||
| 2973 | /* left candidate fits perfect */ | ||
| 2974 | fend = bbno; | ||
| 2975 | } else if (fend - bend >= args->maxlen * 4) { | ||
| 2976 | /* right candidate has enough free space */ | ||
| 2977 | fbno = bend; | ||
| 2978 | } else if (bbno - fbno >= args->minlen) { | ||
| 2979 | /* left candidate fits minimum requirement */ | ||
| 2980 | fend = bbno; | ||
| 2981 | } else { | ||
| 2982 | goto fail; | ||
| 2983 | } | ||
| 2984 | } | ||
| 2985 | |||
| 2986 | flen = fend - fbno; | ||
| 2987 | } | ||
| 2988 | spin_unlock(&args->pag->pagb_lock); | ||
| 2989 | |||
| 2990 | if (fbno != bno || flen != len) { | ||
| 2991 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, | ||
| 2992 | fbno, flen); | ||
| 2993 | } | ||
| 2994 | *rbno = fbno; | ||
| 2995 | *rlen = flen; | ||
| 2996 | return; | ||
| 2997 | fail: | ||
| 2998 | /* | ||
| 2999 | * Return a zero extent length as failure indications. All callers | ||
| 3000 | * re-check if the trimmed extent satisfies the minlen requirement. | ||
| 3001 | */ | ||
| 3002 | spin_unlock(&args->pag->pagb_lock); | ||
| 3003 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0); | ||
| 3004 | *rbno = fbno; | ||
| 3005 | *rlen = 0; | ||
| 3006 | } | ||
| 3007 | |||
| 3008 | static void | ||
| 3009 | xfs_alloc_busy_clear_one( | ||
| 3010 | struct xfs_mount *mp, | ||
| 3011 | struct xfs_perag *pag, | ||
| 3012 | struct xfs_busy_extent *busyp) | ||
| 3013 | { | ||
| 3014 | if (busyp->length) { | ||
| 3015 | trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno, | ||
| 3016 | busyp->length); | ||
| 3017 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
| 3018 | } | ||
| 3019 | |||
| 3020 | list_del_init(&busyp->list); | ||
| 3021 | kmem_free(busyp); | ||
| 3022 | } | ||
| 3023 | |||
| 3024 | /* | ||
| 3025 | * Remove all extents on the passed in list from the busy extents tree. | ||
| 3026 | * If do_discard is set skip extents that need to be discarded, and mark | ||
| 3027 | * these as undergoing a discard operation instead. | ||
| 3028 | */ | ||
| 3029 | void | ||
| 3030 | xfs_alloc_busy_clear( | ||
| 3031 | struct xfs_mount *mp, | ||
| 3032 | struct list_head *list, | ||
| 3033 | bool do_discard) | ||
| 3034 | { | ||
| 3035 | struct xfs_busy_extent *busyp, *n; | ||
| 3036 | struct xfs_perag *pag = NULL; | ||
| 3037 | xfs_agnumber_t agno = NULLAGNUMBER; | ||
| 3038 | |||
| 3039 | list_for_each_entry_safe(busyp, n, list, list) { | ||
| 3040 | if (busyp->agno != agno) { | ||
| 3041 | if (pag) { | ||
| 3042 | spin_unlock(&pag->pagb_lock); | ||
| 3043 | xfs_perag_put(pag); | ||
| 3044 | } | ||
| 3045 | pag = xfs_perag_get(mp, busyp->agno); | ||
| 3046 | spin_lock(&pag->pagb_lock); | ||
| 3047 | agno = busyp->agno; | ||
| 3048 | } | ||
| 3049 | |||
| 3050 | if (do_discard && busyp->length && | ||
| 3051 | !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD)) | ||
| 3052 | busyp->flags = XFS_ALLOC_BUSY_DISCARDED; | ||
| 3053 | else | ||
| 3054 | xfs_alloc_busy_clear_one(mp, pag, busyp); | ||
| 3055 | } | ||
| 3056 | |||
| 3057 | if (pag) { | ||
| 3058 | spin_unlock(&pag->pagb_lock); | ||
| 3059 | xfs_perag_put(pag); | ||
| 3060 | } | ||
| 3061 | } | ||
| 3062 | |||
| 3063 | /* | ||
| 3064 | * Callback for list_sort to sort busy extents by the AG they reside in. | ||
| 3065 | */ | ||
| 3066 | int | ||
| 3067 | xfs_busy_extent_ag_cmp( | ||
| 3068 | void *priv, | ||
| 3069 | struct list_head *a, | ||
| 3070 | struct list_head *b) | ||
| 3071 | { | ||
| 3072 | return container_of(a, struct xfs_busy_extent, list)->agno - | ||
| 3073 | container_of(b, struct xfs_busy_extent, list)->agno; | ||
| 3074 | } | ||
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 3a7e7d8f8ded..93be4a667ca1 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
| @@ -23,7 +23,6 @@ struct xfs_btree_cur; | |||
| 23 | struct xfs_mount; | 23 | struct xfs_mount; |
| 24 | struct xfs_perag; | 24 | struct xfs_perag; |
| 25 | struct xfs_trans; | 25 | struct xfs_trans; |
| 26 | struct xfs_busy_extent; | ||
| 27 | 26 | ||
| 28 | extern struct workqueue_struct *xfs_alloc_wq; | 27 | extern struct workqueue_struct *xfs_alloc_wq; |
| 29 | 28 | ||
| @@ -139,33 +138,6 @@ xfs_extlen_t | |||
| 139 | xfs_alloc_longest_free_extent(struct xfs_mount *mp, | 138 | xfs_alloc_longest_free_extent(struct xfs_mount *mp, |
| 140 | struct xfs_perag *pag); | 139 | struct xfs_perag *pag); |
| 141 | 140 | ||
| 142 | #ifdef __KERNEL__ | ||
| 143 | void | ||
| 144 | xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, | ||
| 145 | xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); | ||
| 146 | |||
| 147 | void | ||
| 148 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list, | ||
| 149 | bool do_discard); | ||
| 150 | |||
| 151 | int | ||
| 152 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
| 153 | xfs_agblock_t bno, xfs_extlen_t len); | ||
| 154 | |||
| 155 | void | ||
| 156 | xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
| 157 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); | ||
| 158 | |||
| 159 | int | ||
| 160 | xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b); | ||
| 161 | |||
| 162 | static inline void xfs_alloc_busy_sort(struct list_head *list) | ||
| 163 | { | ||
| 164 | list_sort(NULL, list, xfs_busy_extent_ag_cmp); | ||
| 165 | } | ||
| 166 | |||
| 167 | #endif /* __KERNEL__ */ | ||
| 168 | |||
| 169 | /* | 141 | /* |
| 170 | * Compute and fill in value of m_ag_maxlevels. | 142 | * Compute and fill in value of m_ag_maxlevels. |
| 171 | */ | 143 | */ |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index ffb3386e45c1..f1647caace8f 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
| @@ -18,9 +18,7 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
| @@ -32,6 +30,7 @@ | |||
| 32 | #include "xfs_inode.h" | 30 | #include "xfs_inode.h" |
| 33 | #include "xfs_btree.h" | 31 | #include "xfs_btree.h" |
| 34 | #include "xfs_alloc.h" | 32 | #include "xfs_alloc.h" |
| 33 | #include "xfs_extent_busy.h" | ||
| 35 | #include "xfs_error.h" | 34 | #include "xfs_error.h" |
| 36 | #include "xfs_trace.h" | 35 | #include "xfs_trace.h" |
| 37 | 36 | ||
| @@ -94,7 +93,7 @@ xfs_allocbt_alloc_block( | |||
| 94 | return 0; | 93 | return 0; |
| 95 | } | 94 | } |
| 96 | 95 | ||
| 97 | xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); | 96 | xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); |
| 98 | 97 | ||
| 99 | xfs_trans_agbtree_delta(cur->bc_tp, 1); | 98 | xfs_trans_agbtree_delta(cur->bc_tp, 1); |
| 100 | new->s = cpu_to_be32(bno); | 99 | new->s = cpu_to_be32(bno); |
| @@ -119,8 +118,8 @@ xfs_allocbt_free_block( | |||
| 119 | if (error) | 118 | if (error) |
| 120 | return error; | 119 | return error; |
| 121 | 120 | ||
| 122 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, | 121 | xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, |
| 123 | XFS_ALLOC_BUSY_SKIP_DISCARD); | 122 | XFS_EXTENT_BUSY_SKIP_DISCARD); |
| 124 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 123 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
| 125 | return 0; | 124 | return 0; |
| 126 | } | 125 | } |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0dbb9e70fe21..ae31c313a79e 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -16,9 +16,7 @@ | |||
| 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_bit.h" | ||
| 20 | #include "xfs_log.h" | 19 | #include "xfs_log.h" |
| 21 | #include "xfs_inum.h" | ||
| 22 | #include "xfs_sb.h" | 20 | #include "xfs_sb.h" |
| 23 | #include "xfs_ag.h" | 21 | #include "xfs_ag.h" |
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| @@ -29,7 +27,6 @@ | |||
| 29 | #include "xfs_inode_item.h" | 27 | #include "xfs_inode_item.h" |
| 30 | #include "xfs_alloc.h" | 28 | #include "xfs_alloc.h" |
| 31 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
| 32 | #include "xfs_rw.h" | ||
| 33 | #include "xfs_iomap.h" | 30 | #include "xfs_iomap.h" |
| 34 | #include "xfs_vnodeops.h" | 31 | #include "xfs_vnodeops.h" |
| 35 | #include "xfs_trace.h" | 32 | #include "xfs_trace.h" |
| @@ -623,7 +620,7 @@ xfs_map_at_offset( | |||
| 623 | * or delayed allocate extent. | 620 | * or delayed allocate extent. |
| 624 | */ | 621 | */ |
| 625 | STATIC int | 622 | STATIC int |
| 626 | xfs_is_delayed_page( | 623 | xfs_check_page_type( |
| 627 | struct page *page, | 624 | struct page *page, |
| 628 | unsigned int type) | 625 | unsigned int type) |
| 629 | { | 626 | { |
| @@ -637,11 +634,11 @@ xfs_is_delayed_page( | |||
| 637 | bh = head = page_buffers(page); | 634 | bh = head = page_buffers(page); |
| 638 | do { | 635 | do { |
| 639 | if (buffer_unwritten(bh)) | 636 | if (buffer_unwritten(bh)) |
| 640 | acceptable = (type == IO_UNWRITTEN); | 637 | acceptable += (type == IO_UNWRITTEN); |
| 641 | else if (buffer_delay(bh)) | 638 | else if (buffer_delay(bh)) |
| 642 | acceptable = (type == IO_DELALLOC); | 639 | acceptable += (type == IO_DELALLOC); |
| 643 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 640 | else if (buffer_dirty(bh) && buffer_mapped(bh)) |
| 644 | acceptable = (type == IO_OVERWRITE); | 641 | acceptable += (type == IO_OVERWRITE); |
| 645 | else | 642 | else |
| 646 | break; | 643 | break; |
| 647 | } while ((bh = bh->b_this_page) != head); | 644 | } while ((bh = bh->b_this_page) != head); |
| @@ -684,7 +681,7 @@ xfs_convert_page( | |||
| 684 | goto fail_unlock_page; | 681 | goto fail_unlock_page; |
| 685 | if (page->mapping != inode->i_mapping) | 682 | if (page->mapping != inode->i_mapping) |
| 686 | goto fail_unlock_page; | 683 | goto fail_unlock_page; |
| 687 | if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) | 684 | if (!xfs_check_page_type(page, (*ioendp)->io_type)) |
| 688 | goto fail_unlock_page; | 685 | goto fail_unlock_page; |
| 689 | 686 | ||
| 690 | /* | 687 | /* |
| @@ -834,7 +831,7 @@ xfs_aops_discard_page( | |||
| 834 | struct buffer_head *bh, *head; | 831 | struct buffer_head *bh, *head; |
| 835 | loff_t offset = page_offset(page); | 832 | loff_t offset = page_offset(page); |
| 836 | 833 | ||
| 837 | if (!xfs_is_delayed_page(page, IO_DELALLOC)) | 834 | if (!xfs_check_page_type(page, IO_DELALLOC)) |
| 838 | goto out_invalidate; | 835 | goto out_invalidate; |
| 839 | 836 | ||
| 840 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 837 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
| @@ -1146,7 +1143,14 @@ __xfs_get_blocks( | |||
| 1146 | if (!create && direct && offset >= i_size_read(inode)) | 1143 | if (!create && direct && offset >= i_size_read(inode)) |
| 1147 | return 0; | 1144 | return 0; |
| 1148 | 1145 | ||
| 1149 | if (create) { | 1146 | /* |
| 1147 | * Direct I/O is usually done on preallocated files, so try getting | ||
| 1148 | * a block mapping without an exclusive lock first. For buffered | ||
| 1149 | * writes we already have the exclusive iolock anyway, so avoiding | ||
| 1150 | * a lock roundtrip here by taking the ilock exclusive from the | ||
| 1151 | * beginning is a useful micro optimization. | ||
| 1152 | */ | ||
| 1153 | if (create && !direct) { | ||
| 1150 | lockmode = XFS_ILOCK_EXCL; | 1154 | lockmode = XFS_ILOCK_EXCL; |
| 1151 | xfs_ilock(ip, lockmode); | 1155 | xfs_ilock(ip, lockmode); |
| 1152 | } else { | 1156 | } else { |
| @@ -1168,23 +1172,45 @@ __xfs_get_blocks( | |||
| 1168 | (!nimaps || | 1172 | (!nimaps || |
| 1169 | (imap.br_startblock == HOLESTARTBLOCK || | 1173 | (imap.br_startblock == HOLESTARTBLOCK || |
| 1170 | imap.br_startblock == DELAYSTARTBLOCK))) { | 1174 | imap.br_startblock == DELAYSTARTBLOCK))) { |
| 1171 | if (direct) { | 1175 | if (direct || xfs_get_extsz_hint(ip)) { |
| 1176 | /* | ||
| 1177 | * Drop the ilock in preparation for starting the block | ||
| 1178 | * allocation transaction. It will be retaken | ||
| 1179 | * exclusively inside xfs_iomap_write_direct for the | ||
| 1180 | * actual allocation. | ||
| 1181 | */ | ||
| 1182 | xfs_iunlock(ip, lockmode); | ||
| 1172 | error = xfs_iomap_write_direct(ip, offset, size, | 1183 | error = xfs_iomap_write_direct(ip, offset, size, |
| 1173 | &imap, nimaps); | 1184 | &imap, nimaps); |
| 1185 | if (error) | ||
| 1186 | return -error; | ||
| 1187 | new = 1; | ||
| 1174 | } else { | 1188 | } else { |
| 1189 | /* | ||
| 1190 | * Delalloc reservations do not require a transaction, | ||
| 1191 | * we can go on without dropping the lock here. If we | ||
| 1192 | * are allocating a new delalloc block, make sure that | ||
| 1193 | * we set the new flag so that we mark the buffer new so | ||
| 1194 | * that we know that it is newly allocated if the write | ||
| 1195 | * fails. | ||
| 1196 | */ | ||
| 1197 | if (nimaps && imap.br_startblock == HOLESTARTBLOCK) | ||
| 1198 | new = 1; | ||
| 1175 | error = xfs_iomap_write_delay(ip, offset, size, &imap); | 1199 | error = xfs_iomap_write_delay(ip, offset, size, &imap); |
| 1200 | if (error) | ||
| 1201 | goto out_unlock; | ||
| 1202 | |||
| 1203 | xfs_iunlock(ip, lockmode); | ||
| 1176 | } | 1204 | } |
| 1177 | if (error) | ||
| 1178 | goto out_unlock; | ||
| 1179 | 1205 | ||
| 1180 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | 1206 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); |
| 1181 | } else if (nimaps) { | 1207 | } else if (nimaps) { |
| 1182 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | 1208 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); |
| 1209 | xfs_iunlock(ip, lockmode); | ||
| 1183 | } else { | 1210 | } else { |
| 1184 | trace_xfs_get_blocks_notfound(ip, offset, size); | 1211 | trace_xfs_get_blocks_notfound(ip, offset, size); |
| 1185 | goto out_unlock; | 1212 | goto out_unlock; |
| 1186 | } | 1213 | } |
| 1187 | xfs_iunlock(ip, lockmode); | ||
| 1188 | 1214 | ||
| 1189 | if (imap.br_startblock != HOLESTARTBLOCK && | 1215 | if (imap.br_startblock != HOLESTARTBLOCK && |
| 1190 | imap.br_startblock != DELAYSTARTBLOCK) { | 1216 | imap.br_startblock != DELAYSTARTBLOCK) { |
| @@ -1386,52 +1412,91 @@ out_destroy_ioend: | |||
| 1386 | return ret; | 1412 | return ret; |
| 1387 | } | 1413 | } |
| 1388 | 1414 | ||
| 1415 | /* | ||
| 1416 | * Punch out the delalloc blocks we have already allocated. | ||
| 1417 | * | ||
| 1418 | * Don't bother with xfs_setattr given that nothing can have made it to disk yet | ||
| 1419 | * as the page is still locked at this point. | ||
| 1420 | */ | ||
| 1421 | STATIC void | ||
| 1422 | xfs_vm_kill_delalloc_range( | ||
| 1423 | struct inode *inode, | ||
| 1424 | loff_t start, | ||
| 1425 | loff_t end) | ||
| 1426 | { | ||
| 1427 | struct xfs_inode *ip = XFS_I(inode); | ||
| 1428 | xfs_fileoff_t start_fsb; | ||
| 1429 | xfs_fileoff_t end_fsb; | ||
| 1430 | int error; | ||
| 1431 | |||
| 1432 | start_fsb = XFS_B_TO_FSB(ip->i_mount, start); | ||
| 1433 | end_fsb = XFS_B_TO_FSB(ip->i_mount, end); | ||
| 1434 | if (end_fsb <= start_fsb) | ||
| 1435 | return; | ||
| 1436 | |||
| 1437 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 1438 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | ||
| 1439 | end_fsb - start_fsb); | ||
| 1440 | if (error) { | ||
| 1441 | /* something screwed, just bail */ | ||
| 1442 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
| 1443 | xfs_alert(ip->i_mount, | ||
| 1444 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
| 1445 | ip->i_ino); | ||
| 1446 | } | ||
| 1447 | } | ||
| 1448 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 1449 | } | ||
| 1450 | |||
| 1389 | STATIC void | 1451 | STATIC void |
| 1390 | xfs_vm_write_failed( | 1452 | xfs_vm_write_failed( |
| 1391 | struct address_space *mapping, | 1453 | struct inode *inode, |
| 1392 | loff_t to) | 1454 | struct page *page, |
| 1455 | loff_t pos, | ||
| 1456 | unsigned len) | ||
| 1393 | { | 1457 | { |
| 1394 | struct inode *inode = mapping->host; | 1458 | loff_t block_offset = pos & PAGE_MASK; |
| 1459 | loff_t block_start; | ||
| 1460 | loff_t block_end; | ||
| 1461 | loff_t from = pos & (PAGE_CACHE_SIZE - 1); | ||
| 1462 | loff_t to = from + len; | ||
| 1463 | struct buffer_head *bh, *head; | ||
| 1395 | 1464 | ||
| 1396 | if (to > inode->i_size) { | 1465 | ASSERT(block_offset + from == pos); |
| 1397 | /* | ||
| 1398 | * Punch out the delalloc blocks we have already allocated. | ||
| 1399 | * | ||
| 1400 | * Don't bother with xfs_setattr given that nothing can have | ||
| 1401 | * made it to disk yet as the page is still locked at this | ||
| 1402 | * point. | ||
| 1403 | */ | ||
| 1404 | struct xfs_inode *ip = XFS_I(inode); | ||
| 1405 | xfs_fileoff_t start_fsb; | ||
| 1406 | xfs_fileoff_t end_fsb; | ||
| 1407 | int error; | ||
| 1408 | 1466 | ||
| 1409 | truncate_pagecache(inode, to, inode->i_size); | 1467 | head = page_buffers(page); |
| 1468 | block_start = 0; | ||
| 1469 | for (bh = head; bh != head || !block_start; | ||
| 1470 | bh = bh->b_this_page, block_start = block_end, | ||
| 1471 | block_offset += bh->b_size) { | ||
| 1472 | block_end = block_start + bh->b_size; | ||
| 1410 | 1473 | ||
| 1411 | /* | 1474 | /* skip buffers before the write */ |
| 1412 | * Check if there are any blocks that are outside of i_size | 1475 | if (block_end <= from) |
| 1413 | * that need to be trimmed back. | 1476 | continue; |
| 1414 | */ | 1477 | |
| 1415 | start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; | 1478 | /* if the buffer is after the write, we're done */ |
| 1416 | end_fsb = XFS_B_TO_FSB(ip->i_mount, to); | 1479 | if (block_start >= to) |
| 1417 | if (end_fsb <= start_fsb) | 1480 | break; |
| 1418 | return; | 1481 | |
| 1419 | 1482 | if (!buffer_delay(bh)) | |
| 1420 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1483 | continue; |
| 1421 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | 1484 | |
| 1422 | end_fsb - start_fsb); | 1485 | if (!buffer_new(bh) && block_offset < i_size_read(inode)) |
| 1423 | if (error) { | 1486 | continue; |
| 1424 | /* something screwed, just bail */ | 1487 | |
| 1425 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 1488 | xfs_vm_kill_delalloc_range(inode, block_offset, |
| 1426 | xfs_alert(ip->i_mount, | 1489 | block_offset + bh->b_size); |
| 1427 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
| 1428 | ip->i_ino); | ||
| 1429 | } | ||
| 1430 | } | ||
| 1431 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 1432 | } | 1490 | } |
| 1491 | |||
| 1433 | } | 1492 | } |
| 1434 | 1493 | ||
| 1494 | /* | ||
| 1495 | * This used to call block_write_begin(), but it unlocks and releases the page | ||
| 1496 | * on error, and we need that page to be able to punch stale delalloc blocks out | ||
| 1497 | * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at | ||
| 1498 | * the appropriate point. | ||
| 1499 | */ | ||
| 1435 | STATIC int | 1500 | STATIC int |
| 1436 | xfs_vm_write_begin( | 1501 | xfs_vm_write_begin( |
| 1437 | struct file *file, | 1502 | struct file *file, |
| @@ -1442,15 +1507,40 @@ xfs_vm_write_begin( | |||
| 1442 | struct page **pagep, | 1507 | struct page **pagep, |
| 1443 | void **fsdata) | 1508 | void **fsdata) |
| 1444 | { | 1509 | { |
| 1445 | int ret; | 1510 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
| 1511 | struct page *page; | ||
| 1512 | int status; | ||
| 1446 | 1513 | ||
| 1447 | ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, | 1514 | ASSERT(len <= PAGE_CACHE_SIZE); |
| 1448 | pagep, xfs_get_blocks); | 1515 | |
| 1449 | if (unlikely(ret)) | 1516 | page = grab_cache_page_write_begin(mapping, index, |
| 1450 | xfs_vm_write_failed(mapping, pos + len); | 1517 | flags | AOP_FLAG_NOFS); |
| 1451 | return ret; | 1518 | if (!page) |
| 1519 | return -ENOMEM; | ||
| 1520 | |||
| 1521 | status = __block_write_begin(page, pos, len, xfs_get_blocks); | ||
| 1522 | if (unlikely(status)) { | ||
| 1523 | struct inode *inode = mapping->host; | ||
| 1524 | |||
| 1525 | xfs_vm_write_failed(inode, page, pos, len); | ||
| 1526 | unlock_page(page); | ||
| 1527 | |||
| 1528 | if (pos + len > i_size_read(inode)) | ||
| 1529 | truncate_pagecache(inode, pos + len, i_size_read(inode)); | ||
| 1530 | |||
| 1531 | page_cache_release(page); | ||
| 1532 | page = NULL; | ||
| 1533 | } | ||
| 1534 | |||
| 1535 | *pagep = page; | ||
| 1536 | return status; | ||
| 1452 | } | 1537 | } |
| 1453 | 1538 | ||
| 1539 | /* | ||
| 1540 | * On failure, we only need to kill delalloc blocks beyond EOF because they | ||
| 1541 | * will never be written. For blocks within EOF, generic_write_end() zeros them | ||
| 1542 | * so they are safe to leave alone and be written with all the other valid data. | ||
| 1543 | */ | ||
| 1454 | STATIC int | 1544 | STATIC int |
| 1455 | xfs_vm_write_end( | 1545 | xfs_vm_write_end( |
| 1456 | struct file *file, | 1546 | struct file *file, |
| @@ -1463,9 +1553,19 @@ xfs_vm_write_end( | |||
| 1463 | { | 1553 | { |
| 1464 | int ret; | 1554 | int ret; |
| 1465 | 1555 | ||
| 1556 | ASSERT(len <= PAGE_CACHE_SIZE); | ||
| 1557 | |||
| 1466 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 1558 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
| 1467 | if (unlikely(ret < len)) | 1559 | if (unlikely(ret < len)) { |
| 1468 | xfs_vm_write_failed(mapping, pos + len); | 1560 | struct inode *inode = mapping->host; |
| 1561 | size_t isize = i_size_read(inode); | ||
| 1562 | loff_t to = pos + len; | ||
| 1563 | |||
| 1564 | if (to > isize) { | ||
| 1565 | truncate_pagecache(inode, to, isize); | ||
| 1566 | xfs_vm_kill_delalloc_range(inode, isize, to); | ||
| 1567 | } | ||
| 1568 | } | ||
| 1469 | return ret; | 1569 | return ret; |
| 1470 | } | 1570 | } |
| 1471 | 1571 | ||
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 65d61b948ead..a17ff01b5adf 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
| @@ -21,7 +21,6 @@ | |||
| 21 | #include "xfs_types.h" | 21 | #include "xfs_types.h" |
| 22 | #include "xfs_bit.h" | 22 | #include "xfs_bit.h" |
| 23 | #include "xfs_log.h" | 23 | #include "xfs_log.h" |
| 24 | #include "xfs_inum.h" | ||
| 25 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
| 26 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
| 27 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
| @@ -39,7 +38,6 @@ | |||
| 39 | #include "xfs_error.h" | 38 | #include "xfs_error.h" |
| 40 | #include "xfs_quota.h" | 39 | #include "xfs_quota.h" |
| 41 | #include "xfs_trans_space.h" | 40 | #include "xfs_trans_space.h" |
| 42 | #include "xfs_rw.h" | ||
| 43 | #include "xfs_vnodeops.h" | 41 | #include "xfs_vnodeops.h" |
| 44 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
| 45 | 43 | ||
| @@ -1987,14 +1985,12 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) | |||
| 1987 | (map[i].br_startblock != HOLESTARTBLOCK)); | 1985 | (map[i].br_startblock != HOLESTARTBLOCK)); |
| 1988 | dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); | 1986 | dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); |
| 1989 | blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); | 1987 | blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); |
| 1990 | error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, | 1988 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, |
| 1991 | blkcnt, XBF_LOCK | XBF_DONT_BLOCK, | 1989 | dblkno, blkcnt, 0, &bp); |
| 1992 | &bp); | ||
| 1993 | if (error) | 1990 | if (error) |
| 1994 | return(error); | 1991 | return(error); |
| 1995 | 1992 | ||
| 1996 | tmp = (valuelen < XFS_BUF_SIZE(bp)) | 1993 | tmp = min_t(int, valuelen, BBTOB(bp->b_length)); |
| 1997 | ? valuelen : XFS_BUF_SIZE(bp); | ||
| 1998 | xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); | 1994 | xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); |
| 1999 | xfs_buf_relse(bp); | 1995 | xfs_buf_relse(bp); |
| 2000 | dst += tmp; | 1996 | dst += tmp; |
| @@ -2097,6 +2093,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
| 2097 | lblkno = args->rmtblkno; | 2093 | lblkno = args->rmtblkno; |
| 2098 | valuelen = args->valuelen; | 2094 | valuelen = args->valuelen; |
| 2099 | while (valuelen > 0) { | 2095 | while (valuelen > 0) { |
| 2096 | int buflen; | ||
| 2097 | |||
| 2100 | /* | 2098 | /* |
| 2101 | * Try to remember where we decided to put the value. | 2099 | * Try to remember where we decided to put the value. |
| 2102 | */ | 2100 | */ |
| @@ -2114,15 +2112,16 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
| 2114 | dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), | 2112 | dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), |
| 2115 | blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); | 2113 | blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); |
| 2116 | 2114 | ||
| 2117 | bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, | 2115 | bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0); |
| 2118 | XBF_LOCK | XBF_DONT_BLOCK); | ||
| 2119 | if (!bp) | 2116 | if (!bp) |
| 2120 | return ENOMEM; | 2117 | return ENOMEM; |
| 2121 | tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : | 2118 | |
| 2122 | XFS_BUF_SIZE(bp); | 2119 | buflen = BBTOB(bp->b_length); |
| 2120 | tmp = min_t(int, valuelen, buflen); | ||
| 2123 | xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); | 2121 | xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); |
| 2124 | if (tmp < XFS_BUF_SIZE(bp)) | 2122 | if (tmp < buflen) |
| 2125 | xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); | 2123 | xfs_buf_zero(bp, tmp, buflen - tmp); |
| 2124 | |||
| 2126 | error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ | 2125 | error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ |
| 2127 | xfs_buf_relse(bp); | 2126 | xfs_buf_relse(bp); |
| 2128 | if (error) | 2127 | if (error) |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 76d93dc953e1..7d89d800f517 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
| @@ -2983,7 +2982,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, | |||
| 2983 | map.br_blockcount); | 2982 | map.br_blockcount); |
| 2984 | bp = xfs_trans_get_buf(*trans, | 2983 | bp = xfs_trans_get_buf(*trans, |
| 2985 | dp->i_mount->m_ddev_targp, | 2984 | dp->i_mount->m_ddev_targp, |
| 2986 | dblkno, dblkcnt, XBF_LOCK); | 2985 | dblkno, dblkcnt, 0); |
| 2987 | if (!bp) | 2986 | if (!bp) |
| 2988 | return ENOMEM; | 2987 | return ENOMEM; |
| 2989 | xfs_trans_binval(*trans, bp); | 2988 | xfs_trans_binval(*trans, bp); |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 85e7e327bcd8..58b815ec8c91 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
| @@ -41,7 +41,6 @@ | |||
| 41 | #include "xfs_rtalloc.h" | 41 | #include "xfs_rtalloc.h" |
| 42 | #include "xfs_error.h" | 42 | #include "xfs_error.h" |
| 43 | #include "xfs_attr_leaf.h" | 43 | #include "xfs_attr_leaf.h" |
| 44 | #include "xfs_rw.h" | ||
| 45 | #include "xfs_quota.h" | 44 | #include "xfs_quota.h" |
| 46 | #include "xfs_trans_space.h" | 45 | #include "xfs_trans_space.h" |
| 47 | #include "xfs_buf_item.h" | 46 | #include "xfs_buf_item.h" |
| @@ -4527,7 +4526,7 @@ out_unreserve_blocks: | |||
| 4527 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); | 4526 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); |
| 4528 | out_unreserve_quota: | 4527 | out_unreserve_quota: |
| 4529 | if (XFS_IS_QUOTA_ON(mp)) | 4528 | if (XFS_IS_QUOTA_ON(mp)) |
| 4530 | xfs_trans_unreserve_quota_nblks(NULL, ip, alen, 0, rt ? | 4529 | xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? |
| 4531 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); | 4530 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); |
| 4532 | return error; | 4531 | return error; |
| 4533 | } | 4532 | } |
| @@ -5621,8 +5620,20 @@ xfs_getbmap( | |||
| 5621 | XFS_FSB_TO_BB(mp, map[i].br_blockcount); | 5620 | XFS_FSB_TO_BB(mp, map[i].br_blockcount); |
| 5622 | out[cur_ext].bmv_unused1 = 0; | 5621 | out[cur_ext].bmv_unused1 = 0; |
| 5623 | out[cur_ext].bmv_unused2 = 0; | 5622 | out[cur_ext].bmv_unused2 = 0; |
| 5624 | ASSERT(((iflags & BMV_IF_DELALLOC) != 0) || | 5623 | |
| 5625 | (map[i].br_startblock != DELAYSTARTBLOCK)); | 5624 | /* |
| 5625 | * delayed allocation extents that start beyond EOF can | ||
| 5626 | * occur due to speculative EOF allocation when the | ||
| 5627 | * delalloc extent is larger than the largest freespace | ||
| 5628 | * extent at conversion time. These extents cannot be | ||
| 5629 | * converted by data writeback, so can exist here even | ||
| 5630 | * if we are not supposed to be finding delalloc | ||
| 5631 | * extents. | ||
| 5632 | */ | ||
| 5633 | if (map[i].br_startblock == DELAYSTARTBLOCK && | ||
| 5634 | map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) | ||
| 5635 | ASSERT((iflags & BMV_IF_DELALLOC) != 0); | ||
| 5636 | |||
| 5626 | if (map[i].br_startblock == HOLESTARTBLOCK && | 5637 | if (map[i].br_startblock == HOLESTARTBLOCK && |
| 5627 | whichfork == XFS_ATTR_FORK) { | 5638 | whichfork == XFS_ATTR_FORK) { |
| 5628 | /* came to the end of attribute fork */ | 5639 | /* came to the end of attribute fork */ |
| @@ -6157,3 +6168,16 @@ next_block: | |||
| 6157 | 6168 | ||
| 6158 | return error; | 6169 | return error; |
| 6159 | } | 6170 | } |
| 6171 | |||
| 6172 | /* | ||
| 6173 | * Convert the given file system block to a disk block. We have to treat it | ||
| 6174 | * differently based on whether the file is a real time file or not, because the | ||
| 6175 | * bmap code does. | ||
| 6176 | */ | ||
| 6177 | xfs_daddr_t | ||
| 6178 | xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) | ||
| 6179 | { | ||
| 6180 | return (XFS_IS_REALTIME_INODE(ip) ? \ | ||
| 6181 | (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ | ||
| 6182 | XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); | ||
| 6183 | } | ||
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 89ee672d378a..803b56d7ce16 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
| @@ -211,6 +211,9 @@ int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, | |||
| 211 | int whichfork, int *count); | 211 | int whichfork, int *count); |
| 212 | int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, | 212 | int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, |
| 213 | xfs_fileoff_t start_fsb, xfs_fileoff_t length); | 213 | xfs_fileoff_t start_fsb, xfs_fileoff_t length); |
| 214 | |||
| 215 | xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb); | ||
| 216 | |||
| 214 | #endif /* __KERNEL__ */ | 217 | #endif /* __KERNEL__ */ |
| 215 | 218 | ||
| 216 | #endif /* __XFS_BMAP_H__ */ | 219 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index e2f5d59cbeaf..862084a47a7e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 1f19f03af9d3..e53e317b1582 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 6819b5163e33..172d3cc8f8cb 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
| @@ -35,14 +35,12 @@ | |||
| 35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
| 36 | 36 | ||
| 37 | #include "xfs_sb.h" | 37 | #include "xfs_sb.h" |
| 38 | #include "xfs_inum.h" | ||
| 39 | #include "xfs_log.h" | 38 | #include "xfs_log.h" |
| 40 | #include "xfs_ag.h" | 39 | #include "xfs_ag.h" |
| 41 | #include "xfs_mount.h" | 40 | #include "xfs_mount.h" |
| 42 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
| 43 | 42 | ||
| 44 | static kmem_zone_t *xfs_buf_zone; | 43 | static kmem_zone_t *xfs_buf_zone; |
| 45 | STATIC int xfsbufd(void *); | ||
| 46 | 44 | ||
| 47 | static struct workqueue_struct *xfslogd_workqueue; | 45 | static struct workqueue_struct *xfslogd_workqueue; |
| 48 | 46 | ||
| @@ -57,11 +55,7 @@ static struct workqueue_struct *xfslogd_workqueue; | |||
| 57 | #endif | 55 | #endif |
| 58 | 56 | ||
| 59 | #define xb_to_gfp(flags) \ | 57 | #define xb_to_gfp(flags) \ |
| 60 | ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \ | 58 | ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN) |
| 61 | ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) | ||
| 62 | |||
| 63 | #define xb_to_km(flags) \ | ||
| 64 | (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) | ||
| 65 | 59 | ||
| 66 | 60 | ||
| 67 | static inline int | 61 | static inline int |
| @@ -71,11 +65,11 @@ xfs_buf_is_vmapped( | |||
| 71 | /* | 65 | /* |
| 72 | * Return true if the buffer is vmapped. | 66 | * Return true if the buffer is vmapped. |
| 73 | * | 67 | * |
| 74 | * The XBF_MAPPED flag is set if the buffer should be mapped, but the | 68 | * b_addr is null if the buffer is not mapped, but the code is clever |
| 75 | * code is clever enough to know it doesn't have to map a single page, | 69 | * enough to know it doesn't have to map a single page, so the check has |
| 76 | * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. | 70 | * to be both for b_addr and bp->b_page_count > 1. |
| 77 | */ | 71 | */ |
| 78 | return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; | 72 | return bp->b_addr && bp->b_page_count > 1; |
| 79 | } | 73 | } |
| 80 | 74 | ||
| 81 | static inline int | 75 | static inline int |
| @@ -144,8 +138,17 @@ void | |||
| 144 | xfs_buf_stale( | 138 | xfs_buf_stale( |
| 145 | struct xfs_buf *bp) | 139 | struct xfs_buf *bp) |
| 146 | { | 140 | { |
| 141 | ASSERT(xfs_buf_islocked(bp)); | ||
| 142 | |||
| 147 | bp->b_flags |= XBF_STALE; | 143 | bp->b_flags |= XBF_STALE; |
| 148 | xfs_buf_delwri_dequeue(bp); | 144 | |
| 145 | /* | ||
| 146 | * Clear the delwri status so that a delwri queue walker will not | ||
| 147 | * flush this buffer to disk now that it is stale. The delwri queue has | ||
| 148 | * a reference to the buffer, so this is safe to do. | ||
| 149 | */ | ||
| 150 | bp->b_flags &= ~_XBF_DELWRI_Q; | ||
| 151 | |||
| 149 | atomic_set(&(bp)->b_lru_ref, 0); | 152 | atomic_set(&(bp)->b_lru_ref, 0); |
| 150 | if (!list_empty(&bp->b_lru)) { | 153 | if (!list_empty(&bp->b_lru)) { |
| 151 | struct xfs_buftarg *btp = bp->b_target; | 154 | struct xfs_buftarg *btp = bp->b_target; |
| @@ -164,22 +167,22 @@ xfs_buf_stale( | |||
| 164 | struct xfs_buf * | 167 | struct xfs_buf * |
| 165 | xfs_buf_alloc( | 168 | xfs_buf_alloc( |
| 166 | struct xfs_buftarg *target, | 169 | struct xfs_buftarg *target, |
| 167 | xfs_off_t range_base, | 170 | xfs_daddr_t blkno, |
| 168 | size_t range_length, | 171 | size_t numblks, |
| 169 | xfs_buf_flags_t flags) | 172 | xfs_buf_flags_t flags) |
| 170 | { | 173 | { |
| 171 | struct xfs_buf *bp; | 174 | struct xfs_buf *bp; |
| 172 | 175 | ||
| 173 | bp = kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)); | 176 | bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); |
| 174 | if (unlikely(!bp)) | 177 | if (unlikely(!bp)) |
| 175 | return NULL; | 178 | return NULL; |
| 176 | 179 | ||
| 177 | /* | 180 | /* |
| 178 | * We don't want certain flags to appear in b_flags. | 181 | * We don't want certain flags to appear in b_flags unless they are |
| 182 | * specifically set by later operations on the buffer. | ||
| 179 | */ | 183 | */ |
| 180 | flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD); | 184 | flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); |
| 181 | 185 | ||
| 182 | memset(bp, 0, sizeof(xfs_buf_t)); | ||
| 183 | atomic_set(&bp->b_hold, 1); | 186 | atomic_set(&bp->b_hold, 1); |
| 184 | atomic_set(&bp->b_lru_ref, 1); | 187 | atomic_set(&bp->b_lru_ref, 1); |
| 185 | init_completion(&bp->b_iowait); | 188 | init_completion(&bp->b_iowait); |
| @@ -189,14 +192,22 @@ xfs_buf_alloc( | |||
| 189 | sema_init(&bp->b_sema, 0); /* held, no waiters */ | 192 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
| 190 | XB_SET_OWNER(bp); | 193 | XB_SET_OWNER(bp); |
| 191 | bp->b_target = target; | 194 | bp->b_target = target; |
| 192 | bp->b_file_offset = range_base; | 195 | |
| 193 | /* | 196 | /* |
| 194 | * Set buffer_length and count_desired to the same value initially. | 197 | * Set length and io_length to the same value initially. |
| 195 | * I/O routines should use count_desired, which will be the same in | 198 | * I/O routines should use io_length, which will be the same in |
| 196 | * most cases but may be reset (e.g. XFS recovery). | 199 | * most cases but may be reset (e.g. XFS recovery). |
| 197 | */ | 200 | */ |
| 198 | bp->b_buffer_length = bp->b_count_desired = range_length; | 201 | bp->b_length = numblks; |
| 202 | bp->b_io_length = numblks; | ||
| 199 | bp->b_flags = flags; | 203 | bp->b_flags = flags; |
| 204 | |||
| 205 | /* | ||
| 206 | * We do not set the block number here in the buffer because we have not | ||
| 207 | * finished initialising the buffer. We insert the buffer into the cache | ||
| 208 | * in this state, so this ensures that we are unable to do IO on a | ||
| 209 | * buffer that hasn't been fully initialised. | ||
| 210 | */ | ||
| 200 | bp->b_bn = XFS_BUF_DADDR_NULL; | 211 | bp->b_bn = XFS_BUF_DADDR_NULL; |
| 201 | atomic_set(&bp->b_pin_count, 0); | 212 | atomic_set(&bp->b_pin_count, 0); |
| 202 | init_waitqueue_head(&bp->b_waiters); | 213 | init_waitqueue_head(&bp->b_waiters); |
| @@ -219,13 +230,12 @@ _xfs_buf_get_pages( | |||
| 219 | { | 230 | { |
| 220 | /* Make sure that we have a page list */ | 231 | /* Make sure that we have a page list */ |
| 221 | if (bp->b_pages == NULL) { | 232 | if (bp->b_pages == NULL) { |
| 222 | bp->b_offset = xfs_buf_poff(bp->b_file_offset); | ||
| 223 | bp->b_page_count = page_count; | 233 | bp->b_page_count = page_count; |
| 224 | if (page_count <= XB_PAGES) { | 234 | if (page_count <= XB_PAGES) { |
| 225 | bp->b_pages = bp->b_page_array; | 235 | bp->b_pages = bp->b_page_array; |
| 226 | } else { | 236 | } else { |
| 227 | bp->b_pages = kmem_alloc(sizeof(struct page *) * | 237 | bp->b_pages = kmem_alloc(sizeof(struct page *) * |
| 228 | page_count, xb_to_km(flags)); | 238 | page_count, KM_NOFS); |
| 229 | if (bp->b_pages == NULL) | 239 | if (bp->b_pages == NULL) |
| 230 | return -ENOMEM; | 240 | return -ENOMEM; |
| 231 | } | 241 | } |
| @@ -288,11 +298,11 @@ xfs_buf_allocate_memory( | |||
| 288 | xfs_buf_t *bp, | 298 | xfs_buf_t *bp, |
| 289 | uint flags) | 299 | uint flags) |
| 290 | { | 300 | { |
| 291 | size_t size = bp->b_count_desired; | 301 | size_t size; |
| 292 | size_t nbytes, offset; | 302 | size_t nbytes, offset; |
| 293 | gfp_t gfp_mask = xb_to_gfp(flags); | 303 | gfp_t gfp_mask = xb_to_gfp(flags); |
| 294 | unsigned short page_count, i; | 304 | unsigned short page_count, i; |
| 295 | xfs_off_t end; | 305 | xfs_off_t start, end; |
| 296 | int error; | 306 | int error; |
| 297 | 307 | ||
| 298 | /* | 308 | /* |
| @@ -300,15 +310,15 @@ xfs_buf_allocate_memory( | |||
| 300 | * the memory from the heap - there's no need for the complexity of | 310 | * the memory from the heap - there's no need for the complexity of |
| 301 | * page arrays to keep allocation down to order 0. | 311 | * page arrays to keep allocation down to order 0. |
| 302 | */ | 312 | */ |
| 303 | if (bp->b_buffer_length < PAGE_SIZE) { | 313 | size = BBTOB(bp->b_length); |
| 304 | bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); | 314 | if (size < PAGE_SIZE) { |
| 315 | bp->b_addr = kmem_alloc(size, KM_NOFS); | ||
| 305 | if (!bp->b_addr) { | 316 | if (!bp->b_addr) { |
| 306 | /* low memory - use alloc_page loop instead */ | 317 | /* low memory - use alloc_page loop instead */ |
| 307 | goto use_alloc_page; | 318 | goto use_alloc_page; |
| 308 | } | 319 | } |
| 309 | 320 | ||
| 310 | if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & | 321 | if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) != |
| 311 | PAGE_MASK) != | ||
| 312 | ((unsigned long)bp->b_addr & PAGE_MASK)) { | 322 | ((unsigned long)bp->b_addr & PAGE_MASK)) { |
| 313 | /* b_addr spans two pages - use alloc_page instead */ | 323 | /* b_addr spans two pages - use alloc_page instead */ |
| 314 | kmem_free(bp->b_addr); | 324 | kmem_free(bp->b_addr); |
| @@ -319,13 +329,14 @@ xfs_buf_allocate_memory( | |||
| 319 | bp->b_pages = bp->b_page_array; | 329 | bp->b_pages = bp->b_page_array; |
| 320 | bp->b_pages[0] = virt_to_page(bp->b_addr); | 330 | bp->b_pages[0] = virt_to_page(bp->b_addr); |
| 321 | bp->b_page_count = 1; | 331 | bp->b_page_count = 1; |
| 322 | bp->b_flags |= XBF_MAPPED | _XBF_KMEM; | 332 | bp->b_flags |= _XBF_KMEM; |
| 323 | return 0; | 333 | return 0; |
| 324 | } | 334 | } |
| 325 | 335 | ||
| 326 | use_alloc_page: | 336 | use_alloc_page: |
| 327 | end = bp->b_file_offset + bp->b_buffer_length; | 337 | start = BBTOB(bp->b_bn) >> PAGE_SHIFT; |
| 328 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); | 338 | end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| 339 | page_count = end - start; | ||
| 329 | error = _xfs_buf_get_pages(bp, page_count, flags); | 340 | error = _xfs_buf_get_pages(bp, page_count, flags); |
| 330 | if (unlikely(error)) | 341 | if (unlikely(error)) |
| 331 | return error; | 342 | return error; |
| @@ -388,8 +399,9 @@ _xfs_buf_map_pages( | |||
| 388 | if (bp->b_page_count == 1) { | 399 | if (bp->b_page_count == 1) { |
| 389 | /* A single page buffer is always mappable */ | 400 | /* A single page buffer is always mappable */ |
| 390 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; | 401 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; |
| 391 | bp->b_flags |= XBF_MAPPED; | 402 | } else if (flags & XBF_UNMAPPED) { |
| 392 | } else if (flags & XBF_MAPPED) { | 403 | bp->b_addr = NULL; |
| 404 | } else { | ||
| 393 | int retried = 0; | 405 | int retried = 0; |
| 394 | 406 | ||
| 395 | do { | 407 | do { |
| @@ -403,7 +415,6 @@ _xfs_buf_map_pages( | |||
| 403 | if (!bp->b_addr) | 415 | if (!bp->b_addr) |
| 404 | return -ENOMEM; | 416 | return -ENOMEM; |
| 405 | bp->b_addr += bp->b_offset; | 417 | bp->b_addr += bp->b_offset; |
| 406 | bp->b_flags |= XBF_MAPPED; | ||
| 407 | } | 418 | } |
| 408 | 419 | ||
| 409 | return 0; | 420 | return 0; |
| @@ -420,29 +431,27 @@ _xfs_buf_map_pages( | |||
| 420 | */ | 431 | */ |
| 421 | xfs_buf_t * | 432 | xfs_buf_t * |
| 422 | _xfs_buf_find( | 433 | _xfs_buf_find( |
| 423 | xfs_buftarg_t *btp, /* block device target */ | 434 | struct xfs_buftarg *btp, |
| 424 | xfs_off_t ioff, /* starting offset of range */ | 435 | xfs_daddr_t blkno, |
| 425 | size_t isize, /* length of range */ | 436 | size_t numblks, |
| 426 | xfs_buf_flags_t flags, | 437 | xfs_buf_flags_t flags, |
| 427 | xfs_buf_t *new_bp) | 438 | xfs_buf_t *new_bp) |
| 428 | { | 439 | { |
| 429 | xfs_off_t range_base; | 440 | size_t numbytes; |
| 430 | size_t range_length; | ||
| 431 | struct xfs_perag *pag; | 441 | struct xfs_perag *pag; |
| 432 | struct rb_node **rbp; | 442 | struct rb_node **rbp; |
| 433 | struct rb_node *parent; | 443 | struct rb_node *parent; |
| 434 | xfs_buf_t *bp; | 444 | xfs_buf_t *bp; |
| 435 | 445 | ||
| 436 | range_base = (ioff << BBSHIFT); | 446 | numbytes = BBTOB(numblks); |
| 437 | range_length = (isize << BBSHIFT); | ||
| 438 | 447 | ||
| 439 | /* Check for IOs smaller than the sector size / not sector aligned */ | 448 | /* Check for IOs smaller than the sector size / not sector aligned */ |
| 440 | ASSERT(!(range_length < (1 << btp->bt_sshift))); | 449 | ASSERT(!(numbytes < (1 << btp->bt_sshift))); |
| 441 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); | 450 | ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); |
| 442 | 451 | ||
| 443 | /* get tree root */ | 452 | /* get tree root */ |
| 444 | pag = xfs_perag_get(btp->bt_mount, | 453 | pag = xfs_perag_get(btp->bt_mount, |
| 445 | xfs_daddr_to_agno(btp->bt_mount, ioff)); | 454 | xfs_daddr_to_agno(btp->bt_mount, blkno)); |
| 446 | 455 | ||
| 447 | /* walk tree */ | 456 | /* walk tree */ |
| 448 | spin_lock(&pag->pag_buf_lock); | 457 | spin_lock(&pag->pag_buf_lock); |
| @@ -453,20 +462,20 @@ _xfs_buf_find( | |||
| 453 | parent = *rbp; | 462 | parent = *rbp; |
| 454 | bp = rb_entry(parent, struct xfs_buf, b_rbnode); | 463 | bp = rb_entry(parent, struct xfs_buf, b_rbnode); |
| 455 | 464 | ||
| 456 | if (range_base < bp->b_file_offset) | 465 | if (blkno < bp->b_bn) |
| 457 | rbp = &(*rbp)->rb_left; | 466 | rbp = &(*rbp)->rb_left; |
| 458 | else if (range_base > bp->b_file_offset) | 467 | else if (blkno > bp->b_bn) |
| 459 | rbp = &(*rbp)->rb_right; | 468 | rbp = &(*rbp)->rb_right; |
| 460 | else { | 469 | else { |
| 461 | /* | 470 | /* |
| 462 | * found a block offset match. If the range doesn't | 471 | * found a block number match. If the range doesn't |
| 463 | * match, the only way this is allowed is if the buffer | 472 | * match, the only way this is allowed is if the buffer |
| 464 | * in the cache is stale and the transaction that made | 473 | * in the cache is stale and the transaction that made |
| 465 | * it stale has not yet committed. i.e. we are | 474 | * it stale has not yet committed. i.e. we are |
| 466 | * reallocating a busy extent. Skip this buffer and | 475 | * reallocating a busy extent. Skip this buffer and |
| 467 | * continue searching to the right for an exact match. | 476 | * continue searching to the right for an exact match. |
| 468 | */ | 477 | */ |
| 469 | if (bp->b_buffer_length != range_length) { | 478 | if (bp->b_length != numblks) { |
| 470 | ASSERT(bp->b_flags & XBF_STALE); | 479 | ASSERT(bp->b_flags & XBF_STALE); |
| 471 | rbp = &(*rbp)->rb_right; | 480 | rbp = &(*rbp)->rb_right; |
| 472 | continue; | 481 | continue; |
| @@ -511,7 +520,7 @@ found: | |||
| 511 | */ | 520 | */ |
| 512 | if (bp->b_flags & XBF_STALE) { | 521 | if (bp->b_flags & XBF_STALE) { |
| 513 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); | 522 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); |
| 514 | bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; | 523 | bp->b_flags &= _XBF_KMEM | _XBF_PAGES; |
| 515 | } | 524 | } |
| 516 | 525 | ||
| 517 | trace_xfs_buf_find(bp, flags, _RET_IP_); | 526 | trace_xfs_buf_find(bp, flags, _RET_IP_); |
| @@ -526,63 +535,59 @@ found: | |||
| 526 | */ | 535 | */ |
| 527 | struct xfs_buf * | 536 | struct xfs_buf * |
| 528 | xfs_buf_get( | 537 | xfs_buf_get( |
| 529 | xfs_buftarg_t *target,/* target for buffer */ | 538 | xfs_buftarg_t *target, |
| 530 | xfs_off_t ioff, /* starting offset of range */ | 539 | xfs_daddr_t blkno, |
| 531 | size_t isize, /* length of range */ | 540 | size_t numblks, |
| 532 | xfs_buf_flags_t flags) | 541 | xfs_buf_flags_t flags) |
| 533 | { | 542 | { |
| 534 | struct xfs_buf *bp; | 543 | struct xfs_buf *bp; |
| 535 | struct xfs_buf *new_bp; | 544 | struct xfs_buf *new_bp; |
| 536 | int error = 0; | 545 | int error = 0; |
| 537 | 546 | ||
| 538 | bp = _xfs_buf_find(target, ioff, isize, flags, NULL); | 547 | bp = _xfs_buf_find(target, blkno, numblks, flags, NULL); |
| 539 | if (likely(bp)) | 548 | if (likely(bp)) |
| 540 | goto found; | 549 | goto found; |
| 541 | 550 | ||
| 542 | new_bp = xfs_buf_alloc(target, ioff << BBSHIFT, isize << BBSHIFT, | 551 | new_bp = xfs_buf_alloc(target, blkno, numblks, flags); |
| 543 | flags); | ||
| 544 | if (unlikely(!new_bp)) | 552 | if (unlikely(!new_bp)) |
| 545 | return NULL; | 553 | return NULL; |
| 546 | 554 | ||
| 547 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); | 555 | error = xfs_buf_allocate_memory(new_bp, flags); |
| 548 | if (!bp) { | 556 | if (error) { |
| 549 | kmem_zone_free(xfs_buf_zone, new_bp); | 557 | kmem_zone_free(xfs_buf_zone, new_bp); |
| 550 | return NULL; | 558 | return NULL; |
| 551 | } | 559 | } |
| 552 | 560 | ||
| 553 | if (bp == new_bp) { | 561 | bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp); |
| 554 | error = xfs_buf_allocate_memory(bp, flags); | 562 | if (!bp) { |
| 555 | if (error) | 563 | xfs_buf_free(new_bp); |
| 556 | goto no_buffer; | 564 | return NULL; |
| 557 | } else | 565 | } |
| 558 | kmem_zone_free(xfs_buf_zone, new_bp); | 566 | |
| 567 | if (bp != new_bp) | ||
| 568 | xfs_buf_free(new_bp); | ||
| 559 | 569 | ||
| 560 | /* | 570 | /* |
| 561 | * Now we have a workable buffer, fill in the block number so | 571 | * Now we have a workable buffer, fill in the block number so |
| 562 | * that we can do IO on it. | 572 | * that we can do IO on it. |
| 563 | */ | 573 | */ |
| 564 | bp->b_bn = ioff; | 574 | bp->b_bn = blkno; |
| 565 | bp->b_count_desired = bp->b_buffer_length; | 575 | bp->b_io_length = bp->b_length; |
| 566 | 576 | ||
| 567 | found: | 577 | found: |
| 568 | if (!(bp->b_flags & XBF_MAPPED)) { | 578 | if (!bp->b_addr) { |
| 569 | error = _xfs_buf_map_pages(bp, flags); | 579 | error = _xfs_buf_map_pages(bp, flags); |
| 570 | if (unlikely(error)) { | 580 | if (unlikely(error)) { |
| 571 | xfs_warn(target->bt_mount, | 581 | xfs_warn(target->bt_mount, |
| 572 | "%s: failed to map pages\n", __func__); | 582 | "%s: failed to map pages\n", __func__); |
| 573 | goto no_buffer; | 583 | xfs_buf_relse(bp); |
| 584 | return NULL; | ||
| 574 | } | 585 | } |
| 575 | } | 586 | } |
| 576 | 587 | ||
| 577 | XFS_STATS_INC(xb_get); | 588 | XFS_STATS_INC(xb_get); |
| 578 | trace_xfs_buf_get(bp, flags, _RET_IP_); | 589 | trace_xfs_buf_get(bp, flags, _RET_IP_); |
| 579 | return bp; | 590 | return bp; |
| 580 | |||
| 581 | no_buffer: | ||
| 582 | if (flags & (XBF_LOCK | XBF_TRYLOCK)) | ||
| 583 | xfs_buf_unlock(bp); | ||
| 584 | xfs_buf_rele(bp); | ||
| 585 | return NULL; | ||
| 586 | } | 591 | } |
| 587 | 592 | ||
| 588 | STATIC int | 593 | STATIC int |
| @@ -590,32 +595,30 @@ _xfs_buf_read( | |||
| 590 | xfs_buf_t *bp, | 595 | xfs_buf_t *bp, |
| 591 | xfs_buf_flags_t flags) | 596 | xfs_buf_flags_t flags) |
| 592 | { | 597 | { |
| 593 | int status; | 598 | ASSERT(!(flags & XBF_WRITE)); |
| 594 | |||
| 595 | ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); | ||
| 596 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); | 599 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); |
| 597 | 600 | ||
| 598 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD); | 601 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); |
| 599 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); | 602 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); |
| 600 | 603 | ||
| 601 | status = xfs_buf_iorequest(bp); | 604 | xfs_buf_iorequest(bp); |
| 602 | if (status || bp->b_error || (flags & XBF_ASYNC)) | 605 | if (flags & XBF_ASYNC) |
| 603 | return status; | 606 | return 0; |
| 604 | return xfs_buf_iowait(bp); | 607 | return xfs_buf_iowait(bp); |
| 605 | } | 608 | } |
| 606 | 609 | ||
| 607 | xfs_buf_t * | 610 | xfs_buf_t * |
| 608 | xfs_buf_read( | 611 | xfs_buf_read( |
| 609 | xfs_buftarg_t *target, | 612 | xfs_buftarg_t *target, |
| 610 | xfs_off_t ioff, | 613 | xfs_daddr_t blkno, |
| 611 | size_t isize, | 614 | size_t numblks, |
| 612 | xfs_buf_flags_t flags) | 615 | xfs_buf_flags_t flags) |
| 613 | { | 616 | { |
| 614 | xfs_buf_t *bp; | 617 | xfs_buf_t *bp; |
| 615 | 618 | ||
| 616 | flags |= XBF_READ; | 619 | flags |= XBF_READ; |
| 617 | 620 | ||
| 618 | bp = xfs_buf_get(target, ioff, isize, flags); | 621 | bp = xfs_buf_get(target, blkno, numblks, flags); |
| 619 | if (bp) { | 622 | if (bp) { |
| 620 | trace_xfs_buf_read(bp, flags, _RET_IP_); | 623 | trace_xfs_buf_read(bp, flags, _RET_IP_); |
| 621 | 624 | ||
| @@ -627,7 +630,8 @@ xfs_buf_read( | |||
| 627 | * Read ahead call which is already satisfied, | 630 | * Read ahead call which is already satisfied, |
| 628 | * drop the buffer | 631 | * drop the buffer |
| 629 | */ | 632 | */ |
| 630 | goto no_buffer; | 633 | xfs_buf_relse(bp); |
| 634 | return NULL; | ||
| 631 | } else { | 635 | } else { |
| 632 | /* We do not want read in the flags */ | 636 | /* We do not want read in the flags */ |
| 633 | bp->b_flags &= ~XBF_READ; | 637 | bp->b_flags &= ~XBF_READ; |
| @@ -635,12 +639,6 @@ xfs_buf_read( | |||
| 635 | } | 639 | } |
| 636 | 640 | ||
| 637 | return bp; | 641 | return bp; |
| 638 | |||
| 639 | no_buffer: | ||
| 640 | if (flags & (XBF_LOCK | XBF_TRYLOCK)) | ||
| 641 | xfs_buf_unlock(bp); | ||
| 642 | xfs_buf_rele(bp); | ||
| 643 | return NULL; | ||
| 644 | } | 642 | } |
| 645 | 643 | ||
| 646 | /* | 644 | /* |
| @@ -650,14 +648,14 @@ xfs_buf_read( | |||
| 650 | void | 648 | void |
| 651 | xfs_buf_readahead( | 649 | xfs_buf_readahead( |
| 652 | xfs_buftarg_t *target, | 650 | xfs_buftarg_t *target, |
| 653 | xfs_off_t ioff, | 651 | xfs_daddr_t blkno, |
| 654 | size_t isize) | 652 | size_t numblks) |
| 655 | { | 653 | { |
| 656 | if (bdi_read_congested(target->bt_bdi)) | 654 | if (bdi_read_congested(target->bt_bdi)) |
| 657 | return; | 655 | return; |
| 658 | 656 | ||
| 659 | xfs_buf_read(target, ioff, isize, | 657 | xfs_buf_read(target, blkno, numblks, |
| 660 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); | 658 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); |
| 661 | } | 659 | } |
| 662 | 660 | ||
| 663 | /* | 661 | /* |
| @@ -666,16 +664,15 @@ xfs_buf_readahead( | |||
| 666 | */ | 664 | */ |
| 667 | struct xfs_buf * | 665 | struct xfs_buf * |
| 668 | xfs_buf_read_uncached( | 666 | xfs_buf_read_uncached( |
| 669 | struct xfs_mount *mp, | ||
| 670 | struct xfs_buftarg *target, | 667 | struct xfs_buftarg *target, |
| 671 | xfs_daddr_t daddr, | 668 | xfs_daddr_t daddr, |
| 672 | size_t length, | 669 | size_t numblks, |
| 673 | int flags) | 670 | int flags) |
| 674 | { | 671 | { |
| 675 | xfs_buf_t *bp; | 672 | xfs_buf_t *bp; |
| 676 | int error; | 673 | int error; |
| 677 | 674 | ||
| 678 | bp = xfs_buf_get_uncached(target, length, flags); | 675 | bp = xfs_buf_get_uncached(target, numblks, flags); |
| 679 | if (!bp) | 676 | if (!bp) |
| 680 | return NULL; | 677 | return NULL; |
| 681 | 678 | ||
| @@ -683,9 +680,9 @@ xfs_buf_read_uncached( | |||
| 683 | XFS_BUF_SET_ADDR(bp, daddr); | 680 | XFS_BUF_SET_ADDR(bp, daddr); |
| 684 | XFS_BUF_READ(bp); | 681 | XFS_BUF_READ(bp); |
| 685 | 682 | ||
| 686 | xfsbdstrat(mp, bp); | 683 | xfsbdstrat(target->bt_mount, bp); |
| 687 | error = xfs_buf_iowait(bp); | 684 | error = xfs_buf_iowait(bp); |
| 688 | if (error || bp->b_error) { | 685 | if (error) { |
| 689 | xfs_buf_relse(bp); | 686 | xfs_buf_relse(bp); |
| 690 | return NULL; | 687 | return NULL; |
| 691 | } | 688 | } |
| @@ -699,7 +696,7 @@ xfs_buf_read_uncached( | |||
| 699 | void | 696 | void |
| 700 | xfs_buf_set_empty( | 697 | xfs_buf_set_empty( |
| 701 | struct xfs_buf *bp, | 698 | struct xfs_buf *bp, |
| 702 | size_t len) | 699 | size_t numblks) |
| 703 | { | 700 | { |
| 704 | if (bp->b_pages) | 701 | if (bp->b_pages) |
| 705 | _xfs_buf_free_pages(bp); | 702 | _xfs_buf_free_pages(bp); |
| @@ -707,10 +704,9 @@ xfs_buf_set_empty( | |||
| 707 | bp->b_pages = NULL; | 704 | bp->b_pages = NULL; |
| 708 | bp->b_page_count = 0; | 705 | bp->b_page_count = 0; |
| 709 | bp->b_addr = NULL; | 706 | bp->b_addr = NULL; |
| 710 | bp->b_file_offset = 0; | 707 | bp->b_length = numblks; |
| 711 | bp->b_buffer_length = bp->b_count_desired = len; | 708 | bp->b_io_length = numblks; |
| 712 | bp->b_bn = XFS_BUF_DADDR_NULL; | 709 | bp->b_bn = XFS_BUF_DADDR_NULL; |
| 713 | bp->b_flags &= ~XBF_MAPPED; | ||
| 714 | } | 710 | } |
| 715 | 711 | ||
| 716 | static inline struct page * | 712 | static inline struct page * |
| @@ -749,7 +745,7 @@ xfs_buf_associate_memory( | |||
| 749 | bp->b_pages = NULL; | 745 | bp->b_pages = NULL; |
| 750 | bp->b_addr = mem; | 746 | bp->b_addr = mem; |
| 751 | 747 | ||
| 752 | rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK); | 748 | rval = _xfs_buf_get_pages(bp, page_count, 0); |
| 753 | if (rval) | 749 | if (rval) |
| 754 | return rval; | 750 | return rval; |
| 755 | 751 | ||
| @@ -760,9 +756,8 @@ xfs_buf_associate_memory( | |||
| 760 | pageaddr += PAGE_SIZE; | 756 | pageaddr += PAGE_SIZE; |
| 761 | } | 757 | } |
| 762 | 758 | ||
| 763 | bp->b_count_desired = len; | 759 | bp->b_io_length = BTOBB(len); |
| 764 | bp->b_buffer_length = buflen; | 760 | bp->b_length = BTOBB(buflen); |
| 765 | bp->b_flags |= XBF_MAPPED; | ||
| 766 | 761 | ||
| 767 | return 0; | 762 | return 0; |
| 768 | } | 763 | } |
| @@ -770,17 +765,18 @@ xfs_buf_associate_memory( | |||
| 770 | xfs_buf_t * | 765 | xfs_buf_t * |
| 771 | xfs_buf_get_uncached( | 766 | xfs_buf_get_uncached( |
| 772 | struct xfs_buftarg *target, | 767 | struct xfs_buftarg *target, |
| 773 | size_t len, | 768 | size_t numblks, |
| 774 | int flags) | 769 | int flags) |
| 775 | { | 770 | { |
| 776 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; | 771 | unsigned long page_count; |
| 777 | int error, i; | 772 | int error, i; |
| 778 | xfs_buf_t *bp; | 773 | xfs_buf_t *bp; |
| 779 | 774 | ||
| 780 | bp = xfs_buf_alloc(target, 0, len, 0); | 775 | bp = xfs_buf_alloc(target, 0, numblks, 0); |
| 781 | if (unlikely(bp == NULL)) | 776 | if (unlikely(bp == NULL)) |
| 782 | goto fail; | 777 | goto fail; |
| 783 | 778 | ||
| 779 | page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; | ||
| 784 | error = _xfs_buf_get_pages(bp, page_count, 0); | 780 | error = _xfs_buf_get_pages(bp, page_count, 0); |
| 785 | if (error) | 781 | if (error) |
| 786 | goto fail_free_buf; | 782 | goto fail_free_buf; |
| @@ -792,7 +788,7 @@ xfs_buf_get_uncached( | |||
| 792 | } | 788 | } |
| 793 | bp->b_flags |= _XBF_PAGES; | 789 | bp->b_flags |= _XBF_PAGES; |
| 794 | 790 | ||
| 795 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); | 791 | error = _xfs_buf_map_pages(bp, 0); |
| 796 | if (unlikely(error)) { | 792 | if (unlikely(error)) { |
| 797 | xfs_warn(target->bt_mount, | 793 | xfs_warn(target->bt_mount, |
| 798 | "%s: failed to map pages\n", __func__); | 794 | "%s: failed to map pages\n", __func__); |
| @@ -855,7 +851,7 @@ xfs_buf_rele( | |||
| 855 | spin_unlock(&pag->pag_buf_lock); | 851 | spin_unlock(&pag->pag_buf_lock); |
| 856 | } else { | 852 | } else { |
| 857 | xfs_buf_lru_del(bp); | 853 | xfs_buf_lru_del(bp); |
| 858 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 854 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); |
| 859 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); | 855 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
| 860 | spin_unlock(&pag->pag_buf_lock); | 856 | spin_unlock(&pag->pag_buf_lock); |
| 861 | xfs_perag_put(pag); | 857 | xfs_perag_put(pag); |
| @@ -915,13 +911,6 @@ xfs_buf_lock( | |||
| 915 | trace_xfs_buf_lock_done(bp, _RET_IP_); | 911 | trace_xfs_buf_lock_done(bp, _RET_IP_); |
| 916 | } | 912 | } |
| 917 | 913 | ||
| 918 | /* | ||
| 919 | * Releases the lock on the buffer object. | ||
| 920 | * If the buffer is marked delwri but is not queued, do so before we | ||
| 921 | * unlock the buffer as we need to set flags correctly. We also need to | ||
| 922 | * take a reference for the delwri queue because the unlocker is going to | ||
| 923 | * drop their's and they don't know we just queued it. | ||
| 924 | */ | ||
| 925 | void | 914 | void |
| 926 | xfs_buf_unlock( | 915 | xfs_buf_unlock( |
| 927 | struct xfs_buf *bp) | 916 | struct xfs_buf *bp) |
| @@ -1008,9 +997,8 @@ xfs_buf_ioerror_alert( | |||
| 1008 | const char *func) | 997 | const char *func) |
| 1009 | { | 998 | { |
| 1010 | xfs_alert(bp->b_target->bt_mount, | 999 | xfs_alert(bp->b_target->bt_mount, |
| 1011 | "metadata I/O error: block 0x%llx (\"%s\") error %d buf count %zd", | 1000 | "metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", |
| 1012 | (__uint64_t)XFS_BUF_ADDR(bp), func, | 1001 | (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); |
| 1013 | bp->b_error, XFS_BUF_COUNT(bp)); | ||
| 1014 | } | 1002 | } |
| 1015 | 1003 | ||
| 1016 | int | 1004 | int |
| @@ -1019,10 +1007,11 @@ xfs_bwrite( | |||
| 1019 | { | 1007 | { |
| 1020 | int error; | 1008 | int error; |
| 1021 | 1009 | ||
| 1010 | ASSERT(xfs_buf_islocked(bp)); | ||
| 1011 | |||
| 1022 | bp->b_flags |= XBF_WRITE; | 1012 | bp->b_flags |= XBF_WRITE; |
| 1023 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ); | 1013 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); |
| 1024 | 1014 | ||
| 1025 | xfs_buf_delwri_dequeue(bp); | ||
| 1026 | xfs_bdstrat_cb(bp); | 1015 | xfs_bdstrat_cb(bp); |
| 1027 | 1016 | ||
| 1028 | error = xfs_buf_iowait(bp); | 1017 | error = xfs_buf_iowait(bp); |
| @@ -1181,7 +1170,7 @@ _xfs_buf_ioapply( | |||
| 1181 | int rw, map_i, total_nr_pages, nr_pages; | 1170 | int rw, map_i, total_nr_pages, nr_pages; |
| 1182 | struct bio *bio; | 1171 | struct bio *bio; |
| 1183 | int offset = bp->b_offset; | 1172 | int offset = bp->b_offset; |
| 1184 | int size = bp->b_count_desired; | 1173 | int size = BBTOB(bp->b_io_length); |
| 1185 | sector_t sector = bp->b_bn; | 1174 | sector_t sector = bp->b_bn; |
| 1186 | 1175 | ||
| 1187 | total_nr_pages = bp->b_page_count; | 1176 | total_nr_pages = bp->b_page_count; |
| @@ -1229,7 +1218,7 @@ next_chunk: | |||
| 1229 | break; | 1218 | break; |
| 1230 | 1219 | ||
| 1231 | offset = 0; | 1220 | offset = 0; |
| 1232 | sector += nbytes >> BBSHIFT; | 1221 | sector += BTOBB(nbytes); |
| 1233 | size -= nbytes; | 1222 | size -= nbytes; |
| 1234 | total_nr_pages--; | 1223 | total_nr_pages--; |
| 1235 | } | 1224 | } |
| @@ -1248,13 +1237,13 @@ next_chunk: | |||
| 1248 | } | 1237 | } |
| 1249 | } | 1238 | } |
| 1250 | 1239 | ||
| 1251 | int | 1240 | void |
| 1252 | xfs_buf_iorequest( | 1241 | xfs_buf_iorequest( |
| 1253 | xfs_buf_t *bp) | 1242 | xfs_buf_t *bp) |
| 1254 | { | 1243 | { |
| 1255 | trace_xfs_buf_iorequest(bp, _RET_IP_); | 1244 | trace_xfs_buf_iorequest(bp, _RET_IP_); |
| 1256 | 1245 | ||
| 1257 | ASSERT(!(bp->b_flags & XBF_DELWRI)); | 1246 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); |
| 1258 | 1247 | ||
| 1259 | if (bp->b_flags & XBF_WRITE) | 1248 | if (bp->b_flags & XBF_WRITE) |
| 1260 | xfs_buf_wait_unpin(bp); | 1249 | xfs_buf_wait_unpin(bp); |
| @@ -1269,13 +1258,12 @@ xfs_buf_iorequest( | |||
| 1269 | _xfs_buf_ioend(bp, 0); | 1258 | _xfs_buf_ioend(bp, 0); |
| 1270 | 1259 | ||
| 1271 | xfs_buf_rele(bp); | 1260 | xfs_buf_rele(bp); |
| 1272 | return 0; | ||
| 1273 | } | 1261 | } |
| 1274 | 1262 | ||
| 1275 | /* | 1263 | /* |
| 1276 | * Waits for I/O to complete on the buffer supplied. | 1264 | * Waits for I/O to complete on the buffer supplied. It returns immediately if |
| 1277 | * It returns immediately if no I/O is pending. | 1265 | * no I/O is pending or there is already a pending error on the buffer. It |
| 1278 | * It returns the I/O error code, if any, or 0 if there was no error. | 1266 | * returns the I/O error code, if any, or 0 if there was no error. |
| 1279 | */ | 1267 | */ |
| 1280 | int | 1268 | int |
| 1281 | xfs_buf_iowait( | 1269 | xfs_buf_iowait( |
| @@ -1283,7 +1271,8 @@ xfs_buf_iowait( | |||
| 1283 | { | 1271 | { |
| 1284 | trace_xfs_buf_iowait(bp, _RET_IP_); | 1272 | trace_xfs_buf_iowait(bp, _RET_IP_); |
| 1285 | 1273 | ||
| 1286 | wait_for_completion(&bp->b_iowait); | 1274 | if (!bp->b_error) |
| 1275 | wait_for_completion(&bp->b_iowait); | ||
| 1287 | 1276 | ||
| 1288 | trace_xfs_buf_iowait_done(bp, _RET_IP_); | 1277 | trace_xfs_buf_iowait_done(bp, _RET_IP_); |
| 1289 | return bp->b_error; | 1278 | return bp->b_error; |
| @@ -1296,7 +1285,7 @@ xfs_buf_offset( | |||
| 1296 | { | 1285 | { |
| 1297 | struct page *page; | 1286 | struct page *page; |
| 1298 | 1287 | ||
| 1299 | if (bp->b_flags & XBF_MAPPED) | 1288 | if (bp->b_addr) |
| 1300 | return bp->b_addr + offset; | 1289 | return bp->b_addr + offset; |
| 1301 | 1290 | ||
| 1302 | offset += bp->b_offset; | 1291 | offset += bp->b_offset; |
| @@ -1315,27 +1304,30 @@ xfs_buf_iomove( | |||
| 1315 | void *data, /* data address */ | 1304 | void *data, /* data address */ |
| 1316 | xfs_buf_rw_t mode) /* read/write/zero flag */ | 1305 | xfs_buf_rw_t mode) /* read/write/zero flag */ |
| 1317 | { | 1306 | { |
| 1318 | size_t bend, cpoff, csize; | 1307 | size_t bend; |
| 1319 | struct page *page; | ||
| 1320 | 1308 | ||
| 1321 | bend = boff + bsize; | 1309 | bend = boff + bsize; |
| 1322 | while (boff < bend) { | 1310 | while (boff < bend) { |
| 1323 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; | 1311 | struct page *page; |
| 1324 | cpoff = xfs_buf_poff(boff + bp->b_offset); | 1312 | int page_index, page_offset, csize; |
| 1325 | csize = min_t(size_t, | 1313 | |
| 1326 | PAGE_SIZE-cpoff, bp->b_count_desired-boff); | 1314 | page_index = (boff + bp->b_offset) >> PAGE_SHIFT; |
| 1315 | page_offset = (boff + bp->b_offset) & ~PAGE_MASK; | ||
| 1316 | page = bp->b_pages[page_index]; | ||
| 1317 | csize = min_t(size_t, PAGE_SIZE - page_offset, | ||
| 1318 | BBTOB(bp->b_io_length) - boff); | ||
| 1327 | 1319 | ||
| 1328 | ASSERT(((csize + cpoff) <= PAGE_SIZE)); | 1320 | ASSERT((csize + page_offset) <= PAGE_SIZE); |
| 1329 | 1321 | ||
| 1330 | switch (mode) { | 1322 | switch (mode) { |
| 1331 | case XBRW_ZERO: | 1323 | case XBRW_ZERO: |
| 1332 | memset(page_address(page) + cpoff, 0, csize); | 1324 | memset(page_address(page) + page_offset, 0, csize); |
| 1333 | break; | 1325 | break; |
| 1334 | case XBRW_READ: | 1326 | case XBRW_READ: |
| 1335 | memcpy(data, page_address(page) + cpoff, csize); | 1327 | memcpy(data, page_address(page) + page_offset, csize); |
| 1336 | break; | 1328 | break; |
| 1337 | case XBRW_WRITE: | 1329 | case XBRW_WRITE: |
| 1338 | memcpy(page_address(page) + cpoff, data, csize); | 1330 | memcpy(page_address(page) + page_offset, data, csize); |
| 1339 | } | 1331 | } |
| 1340 | 1332 | ||
| 1341 | boff += csize; | 1333 | boff += csize; |
| @@ -1435,11 +1427,9 @@ xfs_free_buftarg( | |||
| 1435 | { | 1427 | { |
| 1436 | unregister_shrinker(&btp->bt_shrinker); | 1428 | unregister_shrinker(&btp->bt_shrinker); |
| 1437 | 1429 | ||
| 1438 | xfs_flush_buftarg(btp, 1); | ||
| 1439 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1430 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
| 1440 | xfs_blkdev_issue_flush(btp); | 1431 | xfs_blkdev_issue_flush(btp); |
| 1441 | 1432 | ||
| 1442 | kthread_stop(btp->bt_task); | ||
| 1443 | kmem_free(btp); | 1433 | kmem_free(btp); |
| 1444 | } | 1434 | } |
| 1445 | 1435 | ||
| @@ -1491,20 +1481,6 @@ xfs_setsize_buftarg( | |||
| 1491 | return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); | 1481 | return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); |
| 1492 | } | 1482 | } |
| 1493 | 1483 | ||
| 1494 | STATIC int | ||
| 1495 | xfs_alloc_delwri_queue( | ||
| 1496 | xfs_buftarg_t *btp, | ||
| 1497 | const char *fsname) | ||
| 1498 | { | ||
| 1499 | INIT_LIST_HEAD(&btp->bt_delwri_queue); | ||
| 1500 | spin_lock_init(&btp->bt_delwri_lock); | ||
| 1501 | btp->bt_flags = 0; | ||
| 1502 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); | ||
| 1503 | if (IS_ERR(btp->bt_task)) | ||
| 1504 | return PTR_ERR(btp->bt_task); | ||
| 1505 | return 0; | ||
| 1506 | } | ||
| 1507 | |||
| 1508 | xfs_buftarg_t * | 1484 | xfs_buftarg_t * |
| 1509 | xfs_alloc_buftarg( | 1485 | xfs_alloc_buftarg( |
| 1510 | struct xfs_mount *mp, | 1486 | struct xfs_mount *mp, |
| @@ -1527,8 +1503,6 @@ xfs_alloc_buftarg( | |||
| 1527 | spin_lock_init(&btp->bt_lru_lock); | 1503 | spin_lock_init(&btp->bt_lru_lock); |
| 1528 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1504 | if (xfs_setsize_buftarg_early(btp, bdev)) |
| 1529 | goto error; | 1505 | goto error; |
| 1530 | if (xfs_alloc_delwri_queue(btp, fsname)) | ||
| 1531 | goto error; | ||
| 1532 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | 1506 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; |
| 1533 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | 1507 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; |
| 1534 | register_shrinker(&btp->bt_shrinker); | 1508 | register_shrinker(&btp->bt_shrinker); |
| @@ -1539,125 +1513,52 @@ error: | |||
| 1539 | return NULL; | 1513 | return NULL; |
| 1540 | } | 1514 | } |
| 1541 | 1515 | ||
| 1542 | |||
| 1543 | /* | 1516 | /* |
| 1544 | * Delayed write buffer handling | 1517 | * Add a buffer to the delayed write list. |
| 1518 | * | ||
| 1519 | * This queues a buffer for writeout if it hasn't already been. Note that | ||
| 1520 | * neither this routine nor the buffer list submission functions perform | ||
| 1521 | * any internal synchronization. It is expected that the lists are thread-local | ||
| 1522 | * to the callers. | ||
| 1523 | * | ||
| 1524 | * Returns true if we queued up the buffer, or false if it already had | ||
| 1525 | * been on the buffer list. | ||
| 1545 | */ | 1526 | */ |
| 1546 | void | 1527 | bool |
| 1547 | xfs_buf_delwri_queue( | 1528 | xfs_buf_delwri_queue( |
| 1548 | xfs_buf_t *bp) | 1529 | struct xfs_buf *bp, |
| 1530 | struct list_head *list) | ||
| 1549 | { | 1531 | { |
| 1550 | struct xfs_buftarg *btp = bp->b_target; | 1532 | ASSERT(xfs_buf_islocked(bp)); |
| 1551 | |||
| 1552 | trace_xfs_buf_delwri_queue(bp, _RET_IP_); | ||
| 1553 | |||
| 1554 | ASSERT(!(bp->b_flags & XBF_READ)); | 1533 | ASSERT(!(bp->b_flags & XBF_READ)); |
| 1555 | 1534 | ||
| 1556 | spin_lock(&btp->bt_delwri_lock); | 1535 | /* |
| 1557 | if (!list_empty(&bp->b_list)) { | 1536 | * If the buffer is already marked delwri it already is queued up |
| 1558 | /* if already in the queue, move it to the tail */ | 1537 | * by someone else for imediate writeout. Just ignore it in that |
| 1559 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | 1538 | * case. |
| 1560 | list_move_tail(&bp->b_list, &btp->bt_delwri_queue); | 1539 | */ |
| 1561 | } else { | 1540 | if (bp->b_flags & _XBF_DELWRI_Q) { |
| 1562 | /* start xfsbufd as it is about to have something to do */ | 1541 | trace_xfs_buf_delwri_queued(bp, _RET_IP_); |
| 1563 | if (list_empty(&btp->bt_delwri_queue)) | 1542 | return false; |
| 1564 | wake_up_process(bp->b_target->bt_task); | ||
| 1565 | |||
| 1566 | atomic_inc(&bp->b_hold); | ||
| 1567 | bp->b_flags |= XBF_DELWRI | _XBF_DELWRI_Q | XBF_ASYNC; | ||
| 1568 | list_add_tail(&bp->b_list, &btp->bt_delwri_queue); | ||
| 1569 | } | ||
| 1570 | bp->b_queuetime = jiffies; | ||
| 1571 | spin_unlock(&btp->bt_delwri_lock); | ||
| 1572 | } | ||
| 1573 | |||
| 1574 | void | ||
| 1575 | xfs_buf_delwri_dequeue( | ||
| 1576 | xfs_buf_t *bp) | ||
| 1577 | { | ||
| 1578 | int dequeued = 0; | ||
| 1579 | |||
| 1580 | spin_lock(&bp->b_target->bt_delwri_lock); | ||
| 1581 | if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { | ||
| 1582 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | ||
| 1583 | list_del_init(&bp->b_list); | ||
| 1584 | dequeued = 1; | ||
| 1585 | } | 1543 | } |
| 1586 | bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); | ||
| 1587 | spin_unlock(&bp->b_target->bt_delwri_lock); | ||
| 1588 | |||
| 1589 | if (dequeued) | ||
| 1590 | xfs_buf_rele(bp); | ||
| 1591 | |||
| 1592 | trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); | ||
| 1593 | } | ||
| 1594 | |||
| 1595 | /* | ||
| 1596 | * If a delwri buffer needs to be pushed before it has aged out, then promote | ||
| 1597 | * it to the head of the delwri queue so that it will be flushed on the next | ||
| 1598 | * xfsbufd run. We do this by resetting the queuetime of the buffer to be older | ||
| 1599 | * than the age currently needed to flush the buffer. Hence the next time the | ||
| 1600 | * xfsbufd sees it is guaranteed to be considered old enough to flush. | ||
| 1601 | */ | ||
| 1602 | void | ||
| 1603 | xfs_buf_delwri_promote( | ||
| 1604 | struct xfs_buf *bp) | ||
| 1605 | { | ||
| 1606 | struct xfs_buftarg *btp = bp->b_target; | ||
| 1607 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; | ||
| 1608 | 1544 | ||
| 1609 | ASSERT(bp->b_flags & XBF_DELWRI); | 1545 | trace_xfs_buf_delwri_queue(bp, _RET_IP_); |
| 1610 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | ||
| 1611 | 1546 | ||
| 1612 | /* | 1547 | /* |
| 1613 | * Check the buffer age before locking the delayed write queue as we | 1548 | * If a buffer gets written out synchronously or marked stale while it |
| 1614 | * don't need to promote buffers that are already past the flush age. | 1549 | * is on a delwri list we lazily remove it. To do this, the other party |
| 1550 | * clears the _XBF_DELWRI_Q flag but otherwise leaves the buffer alone. | ||
| 1551 | * It remains referenced and on the list. In a rare corner case it | ||
| 1552 | * might get readded to a delwri list after the synchronous writeout, in | ||
| 1553 | * which case we need just need to re-add the flag here. | ||
| 1615 | */ | 1554 | */ |
| 1616 | if (bp->b_queuetime < jiffies - age) | 1555 | bp->b_flags |= _XBF_DELWRI_Q; |
| 1617 | return; | 1556 | if (list_empty(&bp->b_list)) { |
| 1618 | bp->b_queuetime = jiffies - age; | 1557 | atomic_inc(&bp->b_hold); |
| 1619 | spin_lock(&btp->bt_delwri_lock); | 1558 | list_add_tail(&bp->b_list, list); |
| 1620 | list_move(&bp->b_list, &btp->bt_delwri_queue); | ||
| 1621 | spin_unlock(&btp->bt_delwri_lock); | ||
| 1622 | } | ||
| 1623 | |||
| 1624 | /* | ||
| 1625 | * Move as many buffers as specified to the supplied list | ||
| 1626 | * idicating if we skipped any buffers to prevent deadlocks. | ||
| 1627 | */ | ||
| 1628 | STATIC int | ||
| 1629 | xfs_buf_delwri_split( | ||
| 1630 | xfs_buftarg_t *target, | ||
| 1631 | struct list_head *list, | ||
| 1632 | unsigned long age) | ||
| 1633 | { | ||
| 1634 | xfs_buf_t *bp, *n; | ||
| 1635 | int skipped = 0; | ||
| 1636 | int force; | ||
| 1637 | |||
| 1638 | force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); | ||
| 1639 | INIT_LIST_HEAD(list); | ||
| 1640 | spin_lock(&target->bt_delwri_lock); | ||
| 1641 | list_for_each_entry_safe(bp, n, &target->bt_delwri_queue, b_list) { | ||
| 1642 | ASSERT(bp->b_flags & XBF_DELWRI); | ||
| 1643 | |||
| 1644 | if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { | ||
| 1645 | if (!force && | ||
| 1646 | time_before(jiffies, bp->b_queuetime + age)) { | ||
| 1647 | xfs_buf_unlock(bp); | ||
| 1648 | break; | ||
| 1649 | } | ||
| 1650 | |||
| 1651 | bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q); | ||
| 1652 | bp->b_flags |= XBF_WRITE; | ||
| 1653 | list_move_tail(&bp->b_list, list); | ||
| 1654 | trace_xfs_buf_delwri_split(bp, _RET_IP_); | ||
| 1655 | } else | ||
| 1656 | skipped++; | ||
| 1657 | } | 1559 | } |
| 1658 | 1560 | ||
| 1659 | spin_unlock(&target->bt_delwri_lock); | 1561 | return true; |
| 1660 | return skipped; | ||
| 1661 | } | 1562 | } |
| 1662 | 1563 | ||
| 1663 | /* | 1564 | /* |
| @@ -1683,99 +1584,109 @@ xfs_buf_cmp( | |||
| 1683 | return 0; | 1584 | return 0; |
| 1684 | } | 1585 | } |
| 1685 | 1586 | ||
| 1686 | STATIC int | 1587 | static int |
| 1687 | xfsbufd( | 1588 | __xfs_buf_delwri_submit( |
| 1688 | void *data) | 1589 | struct list_head *buffer_list, |
| 1590 | struct list_head *io_list, | ||
| 1591 | bool wait) | ||
| 1689 | { | 1592 | { |
| 1690 | xfs_buftarg_t *target = (xfs_buftarg_t *)data; | 1593 | struct blk_plug plug; |
| 1691 | 1594 | struct xfs_buf *bp, *n; | |
| 1692 | current->flags |= PF_MEMALLOC; | 1595 | int pinned = 0; |
| 1693 | 1596 | ||
| 1694 | set_freezable(); | 1597 | list_for_each_entry_safe(bp, n, buffer_list, b_list) { |
| 1598 | if (!wait) { | ||
| 1599 | if (xfs_buf_ispinned(bp)) { | ||
| 1600 | pinned++; | ||
| 1601 | continue; | ||
| 1602 | } | ||
| 1603 | if (!xfs_buf_trylock(bp)) | ||
| 1604 | continue; | ||
| 1605 | } else { | ||
| 1606 | xfs_buf_lock(bp); | ||
| 1607 | } | ||
| 1695 | 1608 | ||
| 1696 | do { | 1609 | /* |
| 1697 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | 1610 | * Someone else might have written the buffer synchronously or |
| 1698 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); | 1611 | * marked it stale in the meantime. In that case only the |
| 1699 | struct list_head tmp; | 1612 | * _XBF_DELWRI_Q flag got cleared, and we have to drop the |
| 1700 | struct blk_plug plug; | 1613 | * reference and remove it from the list here. |
| 1614 | */ | ||
| 1615 | if (!(bp->b_flags & _XBF_DELWRI_Q)) { | ||
| 1616 | list_del_init(&bp->b_list); | ||
| 1617 | xfs_buf_relse(bp); | ||
| 1618 | continue; | ||
| 1619 | } | ||
| 1701 | 1620 | ||
| 1702 | if (unlikely(freezing(current))) | 1621 | list_move_tail(&bp->b_list, io_list); |
| 1703 | try_to_freeze(); | 1622 | trace_xfs_buf_delwri_split(bp, _RET_IP_); |
| 1623 | } | ||
| 1704 | 1624 | ||
| 1705 | /* sleep for a long time if there is nothing to do. */ | 1625 | list_sort(NULL, io_list, xfs_buf_cmp); |
| 1706 | if (list_empty(&target->bt_delwri_queue)) | ||
| 1707 | tout = MAX_SCHEDULE_TIMEOUT; | ||
| 1708 | schedule_timeout_interruptible(tout); | ||
| 1709 | 1626 | ||
| 1710 | xfs_buf_delwri_split(target, &tmp, age); | 1627 | blk_start_plug(&plug); |
| 1711 | list_sort(NULL, &tmp, xfs_buf_cmp); | 1628 | list_for_each_entry_safe(bp, n, io_list, b_list) { |
| 1629 | bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); | ||
| 1630 | bp->b_flags |= XBF_WRITE; | ||
| 1712 | 1631 | ||
| 1713 | blk_start_plug(&plug); | 1632 | if (!wait) { |
| 1714 | while (!list_empty(&tmp)) { | 1633 | bp->b_flags |= XBF_ASYNC; |
| 1715 | struct xfs_buf *bp; | ||
| 1716 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); | ||
| 1717 | list_del_init(&bp->b_list); | 1634 | list_del_init(&bp->b_list); |
| 1718 | xfs_bdstrat_cb(bp); | ||
| 1719 | } | 1635 | } |
| 1720 | blk_finish_plug(&plug); | 1636 | xfs_bdstrat_cb(bp); |
| 1721 | } while (!kthread_should_stop()); | 1637 | } |
| 1638 | blk_finish_plug(&plug); | ||
| 1722 | 1639 | ||
| 1723 | return 0; | 1640 | return pinned; |
| 1724 | } | 1641 | } |
| 1725 | 1642 | ||
| 1726 | /* | 1643 | /* |
| 1727 | * Go through all incore buffers, and release buffers if they belong to | 1644 | * Write out a buffer list asynchronously. |
| 1728 | * the given device. This is used in filesystem error handling to | 1645 | * |
| 1729 | * preserve the consistency of its metadata. | 1646 | * This will take the @buffer_list, write all non-locked and non-pinned buffers |
| 1647 | * out and not wait for I/O completion on any of the buffers. This interface | ||
| 1648 | * is only safely useable for callers that can track I/O completion by higher | ||
| 1649 | * level means, e.g. AIL pushing as the @buffer_list is consumed in this | ||
| 1650 | * function. | ||
| 1730 | */ | 1651 | */ |
| 1731 | int | 1652 | int |
| 1732 | xfs_flush_buftarg( | 1653 | xfs_buf_delwri_submit_nowait( |
| 1733 | xfs_buftarg_t *target, | 1654 | struct list_head *buffer_list) |
| 1734 | int wait) | ||
| 1735 | { | 1655 | { |
| 1736 | xfs_buf_t *bp; | 1656 | LIST_HEAD (io_list); |
| 1737 | int pincount = 0; | 1657 | return __xfs_buf_delwri_submit(buffer_list, &io_list, false); |
| 1738 | LIST_HEAD(tmp_list); | 1658 | } |
| 1739 | LIST_HEAD(wait_list); | ||
| 1740 | struct blk_plug plug; | ||
| 1741 | 1659 | ||
| 1742 | flush_workqueue(xfslogd_workqueue); | 1660 | /* |
| 1661 | * Write out a buffer list synchronously. | ||
| 1662 | * | ||
| 1663 | * This will take the @buffer_list, write all buffers out and wait for I/O | ||
| 1664 | * completion on all of the buffers. @buffer_list is consumed by the function, | ||
| 1665 | * so callers must have some other way of tracking buffers if they require such | ||
| 1666 | * functionality. | ||
| 1667 | */ | ||
| 1668 | int | ||
| 1669 | xfs_buf_delwri_submit( | ||
| 1670 | struct list_head *buffer_list) | ||
| 1671 | { | ||
| 1672 | LIST_HEAD (io_list); | ||
| 1673 | int error = 0, error2; | ||
| 1674 | struct xfs_buf *bp; | ||
| 1743 | 1675 | ||
| 1744 | set_bit(XBT_FORCE_FLUSH, &target->bt_flags); | 1676 | __xfs_buf_delwri_submit(buffer_list, &io_list, true); |
| 1745 | pincount = xfs_buf_delwri_split(target, &tmp_list, 0); | ||
| 1746 | 1677 | ||
| 1747 | /* | 1678 | /* Wait for IO to complete. */ |
| 1748 | * Dropped the delayed write list lock, now walk the temporary list. | 1679 | while (!list_empty(&io_list)) { |
| 1749 | * All I/O is issued async and then if we need to wait for completion | 1680 | bp = list_first_entry(&io_list, struct xfs_buf, b_list); |
| 1750 | * we do that after issuing all the IO. | ||
| 1751 | */ | ||
| 1752 | list_sort(NULL, &tmp_list, xfs_buf_cmp); | ||
| 1753 | 1681 | ||
| 1754 | blk_start_plug(&plug); | ||
| 1755 | while (!list_empty(&tmp_list)) { | ||
| 1756 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); | ||
| 1757 | ASSERT(target == bp->b_target); | ||
| 1758 | list_del_init(&bp->b_list); | 1682 | list_del_init(&bp->b_list); |
| 1759 | if (wait) { | 1683 | error2 = xfs_buf_iowait(bp); |
| 1760 | bp->b_flags &= ~XBF_ASYNC; | 1684 | xfs_buf_relse(bp); |
| 1761 | list_add(&bp->b_list, &wait_list); | 1685 | if (!error) |
| 1762 | } | 1686 | error = error2; |
| 1763 | xfs_bdstrat_cb(bp); | ||
| 1764 | } | ||
| 1765 | blk_finish_plug(&plug); | ||
| 1766 | |||
| 1767 | if (wait) { | ||
| 1768 | /* Wait for IO to complete. */ | ||
| 1769 | while (!list_empty(&wait_list)) { | ||
| 1770 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | ||
| 1771 | |||
| 1772 | list_del_init(&bp->b_list); | ||
| 1773 | xfs_buf_iowait(bp); | ||
| 1774 | xfs_buf_relse(bp); | ||
| 1775 | } | ||
| 1776 | } | 1687 | } |
| 1777 | 1688 | ||
| 1778 | return pincount; | 1689 | return error; |
| 1779 | } | 1690 | } |
| 1780 | 1691 | ||
| 1781 | int __init | 1692 | int __init |
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 5bf3be45f543..7f1d1392ce37 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
| @@ -32,11 +32,6 @@ | |||
| 32 | 32 | ||
| 33 | #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) | 33 | #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) |
| 34 | 34 | ||
| 35 | #define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) | ||
| 36 | #define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) | ||
| 37 | #define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) | ||
| 38 | #define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) | ||
| 39 | |||
| 40 | typedef enum { | 35 | typedef enum { |
| 41 | XBRW_READ = 1, /* transfer into target memory */ | 36 | XBRW_READ = 1, /* transfer into target memory */ |
| 42 | XBRW_WRITE = 2, /* transfer from target memory */ | 37 | XBRW_WRITE = 2, /* transfer from target memory */ |
| @@ -46,11 +41,9 @@ typedef enum { | |||
| 46 | #define XBF_READ (1 << 0) /* buffer intended for reading from device */ | 41 | #define XBF_READ (1 << 0) /* buffer intended for reading from device */ |
| 47 | #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ | 42 | #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ |
| 48 | #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ | 43 | #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ |
| 49 | #define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ | ||
| 50 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ | 44 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ |
| 51 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ | 45 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ |
| 52 | #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ | 46 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ |
| 53 | #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ | ||
| 54 | 47 | ||
| 55 | /* I/O hints for the BIO layer */ | 48 | /* I/O hints for the BIO layer */ |
| 56 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ | 49 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ |
| @@ -58,14 +51,13 @@ typedef enum { | |||
| 58 | #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ | 51 | #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ |
| 59 | 52 | ||
| 60 | /* flags used only as arguments to access routines */ | 53 | /* flags used only as arguments to access routines */ |
| 61 | #define XBF_LOCK (1 << 15)/* lock requested */ | ||
| 62 | #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ | 54 | #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ |
| 63 | #define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */ | 55 | #define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ |
| 64 | 56 | ||
| 65 | /* flags used only internally */ | 57 | /* flags used only internally */ |
| 66 | #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ | 58 | #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ |
| 67 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ | 59 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ |
| 68 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */ | 60 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ |
| 69 | 61 | ||
| 70 | typedef unsigned int xfs_buf_flags_t; | 62 | typedef unsigned int xfs_buf_flags_t; |
| 71 | 63 | ||
| @@ -73,25 +65,18 @@ typedef unsigned int xfs_buf_flags_t; | |||
| 73 | { XBF_READ, "READ" }, \ | 65 | { XBF_READ, "READ" }, \ |
| 74 | { XBF_WRITE, "WRITE" }, \ | 66 | { XBF_WRITE, "WRITE" }, \ |
| 75 | { XBF_READ_AHEAD, "READ_AHEAD" }, \ | 67 | { XBF_READ_AHEAD, "READ_AHEAD" }, \ |
| 76 | { XBF_MAPPED, "MAPPED" }, \ | ||
| 77 | { XBF_ASYNC, "ASYNC" }, \ | 68 | { XBF_ASYNC, "ASYNC" }, \ |
| 78 | { XBF_DONE, "DONE" }, \ | 69 | { XBF_DONE, "DONE" }, \ |
| 79 | { XBF_DELWRI, "DELWRI" }, \ | ||
| 80 | { XBF_STALE, "STALE" }, \ | 70 | { XBF_STALE, "STALE" }, \ |
| 81 | { XBF_SYNCIO, "SYNCIO" }, \ | 71 | { XBF_SYNCIO, "SYNCIO" }, \ |
| 82 | { XBF_FUA, "FUA" }, \ | 72 | { XBF_FUA, "FUA" }, \ |
| 83 | { XBF_FLUSH, "FLUSH" }, \ | 73 | { XBF_FLUSH, "FLUSH" }, \ |
| 84 | { XBF_LOCK, "LOCK" }, /* should never be set */\ | 74 | { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ |
| 85 | { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ | 75 | { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ |
| 86 | { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ | ||
| 87 | { _XBF_PAGES, "PAGES" }, \ | 76 | { _XBF_PAGES, "PAGES" }, \ |
| 88 | { _XBF_KMEM, "KMEM" }, \ | 77 | { _XBF_KMEM, "KMEM" }, \ |
| 89 | { _XBF_DELWRI_Q, "DELWRI_Q" } | 78 | { _XBF_DELWRI_Q, "DELWRI_Q" } |
| 90 | 79 | ||
| 91 | typedef enum { | ||
| 92 | XBT_FORCE_FLUSH = 0, | ||
| 93 | } xfs_buftarg_flags_t; | ||
| 94 | |||
| 95 | typedef struct xfs_buftarg { | 80 | typedef struct xfs_buftarg { |
| 96 | dev_t bt_dev; | 81 | dev_t bt_dev; |
| 97 | struct block_device *bt_bdev; | 82 | struct block_device *bt_bdev; |
| @@ -101,12 +86,6 @@ typedef struct xfs_buftarg { | |||
| 101 | unsigned int bt_sshift; | 86 | unsigned int bt_sshift; |
| 102 | size_t bt_smask; | 87 | size_t bt_smask; |
| 103 | 88 | ||
| 104 | /* per device delwri queue */ | ||
| 105 | struct task_struct *bt_task; | ||
| 106 | struct list_head bt_delwri_queue; | ||
| 107 | spinlock_t bt_delwri_lock; | ||
| 108 | unsigned long bt_flags; | ||
| 109 | |||
| 110 | /* LRU control structures */ | 89 | /* LRU control structures */ |
| 111 | struct shrinker bt_shrinker; | 90 | struct shrinker bt_shrinker; |
| 112 | struct list_head bt_lru; | 91 | struct list_head bt_lru; |
| @@ -128,8 +107,8 @@ typedef struct xfs_buf { | |||
| 128 | * fast-path on locking. | 107 | * fast-path on locking. |
| 129 | */ | 108 | */ |
| 130 | struct rb_node b_rbnode; /* rbtree node */ | 109 | struct rb_node b_rbnode; /* rbtree node */ |
| 131 | xfs_off_t b_file_offset; /* offset in file */ | 110 | xfs_daddr_t b_bn; /* block number for I/O */ |
| 132 | size_t b_buffer_length;/* size of buffer in bytes */ | 111 | int b_length; /* size of buffer in BBs */ |
| 133 | atomic_t b_hold; /* reference count */ | 112 | atomic_t b_hold; /* reference count */ |
| 134 | atomic_t b_lru_ref; /* lru reclaim ref count */ | 113 | atomic_t b_lru_ref; /* lru reclaim ref count */ |
| 135 | xfs_buf_flags_t b_flags; /* status flags */ | 114 | xfs_buf_flags_t b_flags; /* status flags */ |
| @@ -140,8 +119,6 @@ typedef struct xfs_buf { | |||
| 140 | struct list_head b_list; | 119 | struct list_head b_list; |
| 141 | struct xfs_perag *b_pag; /* contains rbtree root */ | 120 | struct xfs_perag *b_pag; /* contains rbtree root */ |
| 142 | xfs_buftarg_t *b_target; /* buffer target (device) */ | 121 | xfs_buftarg_t *b_target; /* buffer target (device) */ |
| 143 | xfs_daddr_t b_bn; /* block number for I/O */ | ||
| 144 | size_t b_count_desired;/* desired transfer size */ | ||
| 145 | void *b_addr; /* virtual address of buffer */ | 122 | void *b_addr; /* virtual address of buffer */ |
| 146 | struct work_struct b_iodone_work; | 123 | struct work_struct b_iodone_work; |
| 147 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 124 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
| @@ -150,7 +127,7 @@ typedef struct xfs_buf { | |||
| 150 | struct xfs_trans *b_transp; | 127 | struct xfs_trans *b_transp; |
| 151 | struct page **b_pages; /* array of page pointers */ | 128 | struct page **b_pages; /* array of page pointers */ |
| 152 | struct page *b_page_array[XB_PAGES]; /* inline pages */ | 129 | struct page *b_page_array[XB_PAGES]; /* inline pages */ |
| 153 | unsigned long b_queuetime; /* time buffer was queued */ | 130 | int b_io_length; /* IO size in BBs */ |
| 154 | atomic_t b_pin_count; /* pin count */ | 131 | atomic_t b_pin_count; /* pin count */ |
| 155 | atomic_t b_io_remaining; /* #outstanding I/O requests */ | 132 | atomic_t b_io_remaining; /* #outstanding I/O requests */ |
| 156 | unsigned int b_page_count; /* size of page array */ | 133 | unsigned int b_page_count; /* size of page array */ |
| @@ -163,26 +140,30 @@ typedef struct xfs_buf { | |||
| 163 | 140 | ||
| 164 | 141 | ||
| 165 | /* Finding and Reading Buffers */ | 142 | /* Finding and Reading Buffers */ |
| 166 | extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, | 143 | struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno, |
| 167 | xfs_buf_flags_t, xfs_buf_t *); | 144 | size_t numblks, xfs_buf_flags_t flags, |
| 145 | struct xfs_buf *new_bp); | ||
| 168 | #define xfs_incore(buftarg,blkno,len,lockit) \ | 146 | #define xfs_incore(buftarg,blkno,len,lockit) \ |
| 169 | _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) | 147 | _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) |
| 170 | 148 | ||
| 171 | extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t, | 149 | struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno, |
| 172 | xfs_buf_flags_t); | 150 | size_t numblks, xfs_buf_flags_t flags); |
| 173 | extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, | 151 | struct xfs_buf *xfs_buf_read(struct xfs_buftarg *target, xfs_daddr_t blkno, |
| 174 | xfs_buf_flags_t); | 152 | size_t numblks, xfs_buf_flags_t flags); |
| 175 | 153 | void xfs_buf_readahead(struct xfs_buftarg *target, xfs_daddr_t blkno, | |
| 176 | struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *, xfs_off_t, size_t, | 154 | size_t numblks); |
| 177 | xfs_buf_flags_t); | 155 | |
| 178 | extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); | 156 | struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks); |
| 179 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); | 157 | struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *target, xfs_daddr_t blkno, |
| 180 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); | 158 | size_t numblks, xfs_buf_flags_t flags); |
| 181 | extern void xfs_buf_hold(xfs_buf_t *); | 159 | void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); |
| 182 | extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); | 160 | int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); |
| 183 | struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, | 161 | |
| 184 | struct xfs_buftarg *target, | 162 | struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, |
| 185 | xfs_daddr_t daddr, size_t length, int flags); | 163 | int flags); |
| 164 | struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target, | ||
| 165 | xfs_daddr_t daddr, size_t numblks, int flags); | ||
| 166 | void xfs_buf_hold(struct xfs_buf *bp); | ||
| 186 | 167 | ||
| 187 | /* Releasing Buffers */ | 168 | /* Releasing Buffers */ |
| 188 | extern void xfs_buf_free(xfs_buf_t *); | 169 | extern void xfs_buf_free(xfs_buf_t *); |
| @@ -204,7 +185,7 @@ extern int xfs_bdstrat_cb(struct xfs_buf *); | |||
| 204 | extern void xfs_buf_ioend(xfs_buf_t *, int); | 185 | extern void xfs_buf_ioend(xfs_buf_t *, int); |
| 205 | extern void xfs_buf_ioerror(xfs_buf_t *, int); | 186 | extern void xfs_buf_ioerror(xfs_buf_t *, int); |
| 206 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); | 187 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); |
| 207 | extern int xfs_buf_iorequest(xfs_buf_t *); | 188 | extern void xfs_buf_iorequest(xfs_buf_t *); |
| 208 | extern int xfs_buf_iowait(xfs_buf_t *); | 189 | extern int xfs_buf_iowait(xfs_buf_t *); |
| 209 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, | 190 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, |
| 210 | xfs_buf_rw_t); | 191 | xfs_buf_rw_t); |
| @@ -220,24 +201,22 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp) | |||
| 220 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); | 201 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); |
| 221 | 202 | ||
| 222 | /* Delayed Write Buffer Routines */ | 203 | /* Delayed Write Buffer Routines */ |
| 223 | extern void xfs_buf_delwri_queue(struct xfs_buf *); | 204 | extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); |
| 224 | extern void xfs_buf_delwri_dequeue(struct xfs_buf *); | 205 | extern int xfs_buf_delwri_submit(struct list_head *); |
| 225 | extern void xfs_buf_delwri_promote(struct xfs_buf *); | 206 | extern int xfs_buf_delwri_submit_nowait(struct list_head *); |
| 226 | 207 | ||
| 227 | /* Buffer Daemon Setup Routines */ | 208 | /* Buffer Daemon Setup Routines */ |
| 228 | extern int xfs_buf_init(void); | 209 | extern int xfs_buf_init(void); |
| 229 | extern void xfs_buf_terminate(void); | 210 | extern void xfs_buf_terminate(void); |
| 230 | 211 | ||
| 231 | #define XFS_BUF_ZEROFLAGS(bp) \ | 212 | #define XFS_BUF_ZEROFLAGS(bp) \ |
| 232 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ | 213 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ |
| 233 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) | 214 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) |
| 234 | 215 | ||
| 235 | void xfs_buf_stale(struct xfs_buf *bp); | 216 | void xfs_buf_stale(struct xfs_buf *bp); |
| 236 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 217 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
| 237 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) | 218 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) |
| 238 | 219 | ||
| 239 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) | ||
| 240 | |||
| 241 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) | 220 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) |
| 242 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) | 221 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) |
| 243 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) | 222 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) |
| @@ -256,12 +235,6 @@ void xfs_buf_stale(struct xfs_buf *bp); | |||
| 256 | 235 | ||
| 257 | #define XFS_BUF_ADDR(bp) ((bp)->b_bn) | 236 | #define XFS_BUF_ADDR(bp) ((bp)->b_bn) |
| 258 | #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) | 237 | #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) |
| 259 | #define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) | ||
| 260 | #define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off)) | ||
| 261 | #define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) | ||
| 262 | #define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) | ||
| 263 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) | ||
| 264 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) | ||
| 265 | 238 | ||
| 266 | static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) | 239 | static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) |
| 267 | { | 240 | { |
| @@ -287,7 +260,6 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, | |||
| 287 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); | 260 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); |
| 288 | extern void xfs_wait_buftarg(xfs_buftarg_t *); | 261 | extern void xfs_wait_buftarg(xfs_buftarg_t *); |
| 289 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); | 262 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); |
| 290 | extern int xfs_flush_buftarg(xfs_buftarg_t *, int); | ||
| 291 | 263 | ||
| 292 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) | 264 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) |
| 293 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) | 265 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index eac97ef81e2a..45df2b857d48 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
| @@ -123,11 +122,11 @@ xfs_buf_item_log_check( | |||
| 123 | ASSERT(bip->bli_logged != NULL); | 122 | ASSERT(bip->bli_logged != NULL); |
| 124 | 123 | ||
| 125 | bp = bip->bli_buf; | 124 | bp = bip->bli_buf; |
| 126 | ASSERT(XFS_BUF_COUNT(bp) > 0); | 125 | ASSERT(bp->b_length > 0); |
| 127 | ASSERT(bp->b_addr != NULL); | 126 | ASSERT(bp->b_addr != NULL); |
| 128 | orig = bip->bli_orig; | 127 | orig = bip->bli_orig; |
| 129 | buffer = bp->b_addr; | 128 | buffer = bp->b_addr; |
| 130 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { | 129 | for (x = 0; x < BBTOB(bp->b_length); x++) { |
| 131 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { | 130 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { |
| 132 | xfs_emerg(bp->b_mount, | 131 | xfs_emerg(bp->b_mount, |
| 133 | "%s: bip %x buffer %x orig %x index %d", | 132 | "%s: bip %x buffer %x orig %x index %d", |
| @@ -418,7 +417,6 @@ xfs_buf_item_unpin( | |||
| 418 | if (freed && stale) { | 417 | if (freed && stale) { |
| 419 | ASSERT(bip->bli_flags & XFS_BLI_STALE); | 418 | ASSERT(bip->bli_flags & XFS_BLI_STALE); |
| 420 | ASSERT(xfs_buf_islocked(bp)); | 419 | ASSERT(xfs_buf_islocked(bp)); |
| 421 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | ||
| 422 | ASSERT(XFS_BUF_ISSTALE(bp)); | 420 | ASSERT(XFS_BUF_ISSTALE(bp)); |
| 423 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); | 421 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
| 424 | 422 | ||
| @@ -455,42 +453,42 @@ xfs_buf_item_unpin( | |||
| 455 | bp->b_iodone = NULL; | 453 | bp->b_iodone = NULL; |
| 456 | } else { | 454 | } else { |
| 457 | spin_lock(&ailp->xa_lock); | 455 | spin_lock(&ailp->xa_lock); |
| 458 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); | 456 | xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR); |
| 459 | xfs_buf_item_relse(bp); | 457 | xfs_buf_item_relse(bp); |
| 460 | ASSERT(bp->b_fspriv == NULL); | 458 | ASSERT(bp->b_fspriv == NULL); |
| 461 | } | 459 | } |
| 462 | xfs_buf_relse(bp); | 460 | xfs_buf_relse(bp); |
| 461 | } else if (freed && remove) { | ||
| 462 | xfs_buf_lock(bp); | ||
| 463 | xfs_buf_ioerror(bp, EIO); | ||
| 464 | XFS_BUF_UNDONE(bp); | ||
| 465 | xfs_buf_stale(bp); | ||
| 466 | xfs_buf_ioend(bp, 0); | ||
| 463 | } | 467 | } |
| 464 | } | 468 | } |
| 465 | 469 | ||
| 466 | /* | ||
| 467 | * This is called to attempt to lock the buffer associated with this | ||
| 468 | * buf log item. Don't sleep on the buffer lock. If we can't get | ||
| 469 | * the lock right away, return 0. If we can get the lock, take a | ||
| 470 | * reference to the buffer. If this is a delayed write buffer that | ||
| 471 | * needs AIL help to be written back, invoke the pushbuf routine | ||
| 472 | * rather than the normal success path. | ||
| 473 | */ | ||
| 474 | STATIC uint | 470 | STATIC uint |
| 475 | xfs_buf_item_trylock( | 471 | xfs_buf_item_push( |
| 476 | struct xfs_log_item *lip) | 472 | struct xfs_log_item *lip, |
| 473 | struct list_head *buffer_list) | ||
| 477 | { | 474 | { |
| 478 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | 475 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
| 479 | struct xfs_buf *bp = bip->bli_buf; | 476 | struct xfs_buf *bp = bip->bli_buf; |
| 477 | uint rval = XFS_ITEM_SUCCESS; | ||
| 480 | 478 | ||
| 481 | if (xfs_buf_ispinned(bp)) | 479 | if (xfs_buf_ispinned(bp)) |
| 482 | return XFS_ITEM_PINNED; | 480 | return XFS_ITEM_PINNED; |
| 483 | if (!xfs_buf_trylock(bp)) | 481 | if (!xfs_buf_trylock(bp)) |
| 484 | return XFS_ITEM_LOCKED; | 482 | return XFS_ITEM_LOCKED; |
| 485 | 483 | ||
| 486 | /* take a reference to the buffer. */ | ||
| 487 | xfs_buf_hold(bp); | ||
| 488 | |||
| 489 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 484 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
| 490 | trace_xfs_buf_item_trylock(bip); | 485 | |
| 491 | if (XFS_BUF_ISDELAYWRITE(bp)) | 486 | trace_xfs_buf_item_push(bip); |
| 492 | return XFS_ITEM_PUSHBUF; | 487 | |
| 493 | return XFS_ITEM_SUCCESS; | 488 | if (!xfs_buf_delwri_queue(bp, buffer_list)) |
| 489 | rval = XFS_ITEM_FLUSHING; | ||
| 490 | xfs_buf_unlock(bp); | ||
| 491 | return rval; | ||
| 494 | } | 492 | } |
| 495 | 493 | ||
| 496 | /* | 494 | /* |
| @@ -603,49 +601,6 @@ xfs_buf_item_committed( | |||
| 603 | return lsn; | 601 | return lsn; |
| 604 | } | 602 | } |
| 605 | 603 | ||
| 606 | /* | ||
| 607 | * The buffer is locked, but is not a delayed write buffer. This happens | ||
| 608 | * if we race with IO completion and hence we don't want to try to write it | ||
| 609 | * again. Just release the buffer. | ||
| 610 | */ | ||
| 611 | STATIC void | ||
| 612 | xfs_buf_item_push( | ||
| 613 | struct xfs_log_item *lip) | ||
| 614 | { | ||
| 615 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | ||
| 616 | struct xfs_buf *bp = bip->bli_buf; | ||
| 617 | |||
| 618 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | ||
| 619 | ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); | ||
| 620 | |||
| 621 | trace_xfs_buf_item_push(bip); | ||
| 622 | |||
| 623 | xfs_buf_relse(bp); | ||
| 624 | } | ||
| 625 | |||
| 626 | /* | ||
| 627 | * The buffer is locked and is a delayed write buffer. Promote the buffer | ||
| 628 | * in the delayed write queue as the caller knows that they must invoke | ||
| 629 | * the xfsbufd to get this buffer written. We have to unlock the buffer | ||
| 630 | * to allow the xfsbufd to write it, too. | ||
| 631 | */ | ||
| 632 | STATIC bool | ||
| 633 | xfs_buf_item_pushbuf( | ||
| 634 | struct xfs_log_item *lip) | ||
| 635 | { | ||
| 636 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | ||
| 637 | struct xfs_buf *bp = bip->bli_buf; | ||
| 638 | |||
| 639 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | ||
| 640 | ASSERT(XFS_BUF_ISDELAYWRITE(bp)); | ||
| 641 | |||
| 642 | trace_xfs_buf_item_pushbuf(bip); | ||
| 643 | |||
| 644 | xfs_buf_delwri_promote(bp); | ||
| 645 | xfs_buf_relse(bp); | ||
| 646 | return true; | ||
| 647 | } | ||
| 648 | |||
| 649 | STATIC void | 604 | STATIC void |
| 650 | xfs_buf_item_committing( | 605 | xfs_buf_item_committing( |
| 651 | struct xfs_log_item *lip, | 606 | struct xfs_log_item *lip, |
| @@ -661,11 +616,9 @@ static const struct xfs_item_ops xfs_buf_item_ops = { | |||
| 661 | .iop_format = xfs_buf_item_format, | 616 | .iop_format = xfs_buf_item_format, |
| 662 | .iop_pin = xfs_buf_item_pin, | 617 | .iop_pin = xfs_buf_item_pin, |
| 663 | .iop_unpin = xfs_buf_item_unpin, | 618 | .iop_unpin = xfs_buf_item_unpin, |
| 664 | .iop_trylock = xfs_buf_item_trylock, | ||
| 665 | .iop_unlock = xfs_buf_item_unlock, | 619 | .iop_unlock = xfs_buf_item_unlock, |
| 666 | .iop_committed = xfs_buf_item_committed, | 620 | .iop_committed = xfs_buf_item_committed, |
| 667 | .iop_push = xfs_buf_item_push, | 621 | .iop_push = xfs_buf_item_push, |
| 668 | .iop_pushbuf = xfs_buf_item_pushbuf, | ||
| 669 | .iop_committing = xfs_buf_item_committing | 622 | .iop_committing = xfs_buf_item_committing |
| 670 | }; | 623 | }; |
| 671 | 624 | ||
| @@ -703,7 +656,8 @@ xfs_buf_item_init( | |||
| 703 | * truncate any pieces. map_size is the size of the | 656 | * truncate any pieces. map_size is the size of the |
| 704 | * bitmap needed to describe the chunks of the buffer. | 657 | * bitmap needed to describe the chunks of the buffer. |
| 705 | */ | 658 | */ |
| 706 | chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLF_CHUNK - 1)) >> XFS_BLF_SHIFT); | 659 | chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >> |
| 660 | XFS_BLF_SHIFT); | ||
| 707 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); | 661 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); |
| 708 | 662 | ||
| 709 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, | 663 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, |
| @@ -713,7 +667,7 @@ xfs_buf_item_init( | |||
| 713 | xfs_buf_hold(bp); | 667 | xfs_buf_hold(bp); |
| 714 | bip->bli_format.blf_type = XFS_LI_BUF; | 668 | bip->bli_format.blf_type = XFS_LI_BUF; |
| 715 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); | 669 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); |
| 716 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); | 670 | bip->bli_format.blf_len = (ushort)bp->b_length; |
| 717 | bip->bli_format.blf_map_size = map_size; | 671 | bip->bli_format.blf_map_size = map_size; |
| 718 | 672 | ||
| 719 | #ifdef XFS_TRANS_DEBUG | 673 | #ifdef XFS_TRANS_DEBUG |
| @@ -725,9 +679,9 @@ xfs_buf_item_init( | |||
| 725 | * the buffer to indicate which bytes the callers have asked | 679 | * the buffer to indicate which bytes the callers have asked |
| 726 | * to have logged. | 680 | * to have logged. |
| 727 | */ | 681 | */ |
| 728 | bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP); | 682 | bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP); |
| 729 | memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp)); | 683 | memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length)); |
| 730 | bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP); | 684 | bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP); |
| 731 | #endif | 685 | #endif |
| 732 | 686 | ||
| 733 | /* | 687 | /* |
| @@ -984,20 +938,27 @@ xfs_buf_iodone_callbacks( | |||
| 984 | * If the write was asynchronous then no one will be looking for the | 938 | * If the write was asynchronous then no one will be looking for the |
| 985 | * error. Clear the error state and write the buffer out again. | 939 | * error. Clear the error state and write the buffer out again. |
| 986 | * | 940 | * |
| 987 | * During sync or umount we'll write all pending buffers again | 941 | * XXX: This helps against transient write errors, but we need to find |
| 988 | * synchronous, which will catch these errors if they keep hanging | 942 | * a way to shut the filesystem down if the writes keep failing. |
| 989 | * around. | 943 | * |
| 944 | * In practice we'll shut the filesystem down soon as non-transient | ||
| 945 | * erorrs tend to affect the whole device and a failing log write | ||
| 946 | * will make us give up. But we really ought to do better here. | ||
| 990 | */ | 947 | */ |
| 991 | if (XFS_BUF_ISASYNC(bp)) { | 948 | if (XFS_BUF_ISASYNC(bp)) { |
| 949 | ASSERT(bp->b_iodone != NULL); | ||
| 950 | |||
| 951 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
| 952 | |||
| 992 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ | 953 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ |
| 993 | 954 | ||
| 994 | if (!XFS_BUF_ISSTALE(bp)) { | 955 | if (!XFS_BUF_ISSTALE(bp)) { |
| 995 | xfs_buf_delwri_queue(bp); | 956 | bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; |
| 996 | XFS_BUF_DONE(bp); | 957 | xfs_bdstrat_cb(bp); |
| 958 | } else { | ||
| 959 | xfs_buf_relse(bp); | ||
| 997 | } | 960 | } |
| 998 | ASSERT(bp->b_iodone != NULL); | 961 | |
| 999 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
| 1000 | xfs_buf_relse(bp); | ||
| 1001 | return; | 962 | return; |
| 1002 | } | 963 | } |
| 1003 | 964 | ||
| @@ -1045,6 +1006,6 @@ xfs_buf_iodone( | |||
| 1045 | * Either way, AIL is useless if we're forcing a shutdown. | 1006 | * Either way, AIL is useless if we're forcing a shutdown. |
| 1046 | */ | 1007 | */ |
| 1047 | spin_lock(&ailp->xa_lock); | 1008 | spin_lock(&ailp->xa_lock); |
| 1048 | xfs_trans_ail_delete(ailp, lip); | 1009 | xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); |
| 1049 | xfs_buf_item_free(BUF_ITEM(lip)); | 1010 | xfs_buf_item_free(BUF_ITEM(lip)); |
| 1050 | } | 1011 | } |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 7f1a6f5b05a6..015b946c5808 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
| @@ -2277,20 +2276,20 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) | |||
| 2277 | if (nbuf == 1) { | 2276 | if (nbuf == 1) { |
| 2278 | dabuf->nbuf = 1; | 2277 | dabuf->nbuf = 1; |
| 2279 | bp = bps[0]; | 2278 | bp = bps[0]; |
| 2280 | dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); | 2279 | dabuf->bbcount = bp->b_length; |
| 2281 | dabuf->data = bp->b_addr; | 2280 | dabuf->data = bp->b_addr; |
| 2282 | dabuf->bps[0] = bp; | 2281 | dabuf->bps[0] = bp; |
| 2283 | } else { | 2282 | } else { |
| 2284 | dabuf->nbuf = nbuf; | 2283 | dabuf->nbuf = nbuf; |
| 2285 | for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) { | 2284 | for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) { |
| 2286 | dabuf->bps[i] = bp = bps[i]; | 2285 | dabuf->bps[i] = bp = bps[i]; |
| 2287 | dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp)); | 2286 | dabuf->bbcount += bp->b_length; |
| 2288 | } | 2287 | } |
| 2289 | dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); | 2288 | dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); |
| 2290 | for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { | 2289 | for (i = off = 0; i < nbuf; i++, off += BBTOB(bp->b_length)) { |
| 2291 | bp = bps[i]; | 2290 | bp = bps[i]; |
| 2292 | memcpy((char *)dabuf->data + off, bp->b_addr, | 2291 | memcpy((char *)dabuf->data + off, bp->b_addr, |
| 2293 | XFS_BUF_COUNT(bp)); | 2292 | BBTOB(bp->b_length)); |
| 2294 | } | 2293 | } |
| 2295 | } | 2294 | } |
| 2296 | return dabuf; | 2295 | return dabuf; |
| @@ -2310,10 +2309,10 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf) | |||
| 2310 | ASSERT(dabuf->nbuf > 1); | 2309 | ASSERT(dabuf->nbuf > 1); |
| 2311 | dabuf->dirty = 0; | 2310 | dabuf->dirty = 0; |
| 2312 | for (i = off = 0; i < dabuf->nbuf; | 2311 | for (i = off = 0; i < dabuf->nbuf; |
| 2313 | i++, off += XFS_BUF_COUNT(bp)) { | 2312 | i++, off += BBTOB(bp->b_length)) { |
| 2314 | bp = dabuf->bps[i]; | 2313 | bp = dabuf->bps[i]; |
| 2315 | memcpy(bp->b_addr, dabuf->data + off, | 2314 | memcpy(bp->b_addr, dabuf->data + off, |
| 2316 | XFS_BUF_COUNT(bp)); | 2315 | BBTOB(bp->b_length)); |
| 2317 | } | 2316 | } |
| 2318 | } | 2317 | } |
| 2319 | } | 2318 | } |
| @@ -2356,10 +2355,10 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) | |||
| 2356 | } | 2355 | } |
| 2357 | dabuf->dirty = 1; | 2356 | dabuf->dirty = 1; |
| 2358 | ASSERT(first <= last); | 2357 | ASSERT(first <= last); |
| 2359 | for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) { | 2358 | for (i = off = 0; i < dabuf->nbuf; i++, off += BBTOB(bp->b_length)) { |
| 2360 | bp = dabuf->bps[i]; | 2359 | bp = dabuf->bps[i]; |
| 2361 | f = off; | 2360 | f = off; |
| 2362 | l = f + XFS_BUF_COUNT(bp) - 1; | 2361 | l = f + BBTOB(bp->b_length) - 1; |
| 2363 | if (f < first) | 2362 | if (f < first) |
| 2364 | f = first; | 2363 | f = first; |
| 2365 | if (l > last) | 2364 | if (l > last) |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 1137bbc5eccb..e00de08dc8ac 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
| @@ -18,9 +18,7 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index a2e27010c7fb..67a250c36d41 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index d3b63aefd01d..586732f2d80d 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 5bbe2a8a023f..2046988e9eb2 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 66e108f561a3..397ffbcbab1d 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 0179a41d9e5a..b0f26780449d 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 79d05e84e296..19bf0c5e38f4 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 1ad3a4b8ca40..f9c3fe304a17 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_sb.h" | 19 | #include "xfs_sb.h" |
| 20 | #include "xfs_inum.h" | ||
| 21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
| 22 | #include "xfs_ag.h" | 21 | #include "xfs_ag.h" |
| 23 | #include "xfs_mount.h" | 22 | #include "xfs_mount.h" |
| @@ -30,6 +29,7 @@ | |||
| 30 | #include "xfs_inode.h" | 29 | #include "xfs_inode.h" |
| 31 | #include "xfs_alloc.h" | 30 | #include "xfs_alloc.h" |
| 32 | #include "xfs_error.h" | 31 | #include "xfs_error.h" |
| 32 | #include "xfs_extent_busy.h" | ||
| 33 | #include "xfs_discard.h" | 33 | #include "xfs_discard.h" |
| 34 | #include "xfs_trace.h" | 34 | #include "xfs_trace.h" |
| 35 | 35 | ||
| @@ -118,7 +118,7 @@ xfs_trim_extents( | |||
| 118 | * If any blocks in the range are still busy, skip the | 118 | * If any blocks in the range are still busy, skip the |
| 119 | * discard and try again the next time. | 119 | * discard and try again the next time. |
| 120 | */ | 120 | */ |
| 121 | if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { | 121 | if (xfs_extent_busy_search(mp, agno, fbno, flen)) { |
| 122 | trace_xfs_discard_busy(mp, agno, fbno, flen); | 122 | trace_xfs_discard_busy(mp, agno, fbno, flen); |
| 123 | goto next_extent; | 123 | goto next_extent; |
| 124 | } | 124 | } |
| @@ -212,7 +212,7 @@ xfs_discard_extents( | |||
| 212 | struct xfs_mount *mp, | 212 | struct xfs_mount *mp, |
| 213 | struct list_head *list) | 213 | struct list_head *list) |
| 214 | { | 214 | { |
| 215 | struct xfs_busy_extent *busyp; | 215 | struct xfs_extent_busy *busyp; |
| 216 | int error = 0; | 216 | int error = 0; |
| 217 | 217 | ||
| 218 | list_for_each_entry(busyp, list, list) { | 218 | list_for_each_entry(busyp, list, list) { |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 1155208fa830..bf27fcca4843 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_bit.h" | 20 | #include "xfs_bit.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
| @@ -857,7 +856,7 @@ xfs_qm_dqflush_done( | |||
| 857 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 856 | /* xfs_trans_ail_delete() drops the AIL lock. */ |
| 858 | spin_lock(&ailp->xa_lock); | 857 | spin_lock(&ailp->xa_lock); |
| 859 | if (lip->li_lsn == qip->qli_flush_lsn) | 858 | if (lip->li_lsn == qip->qli_flush_lsn) |
| 860 | xfs_trans_ail_delete(ailp, lip); | 859 | xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); |
| 861 | else | 860 | else |
| 862 | spin_unlock(&ailp->xa_lock); | 861 | spin_unlock(&ailp->xa_lock); |
| 863 | } | 862 | } |
| @@ -878,8 +877,8 @@ xfs_qm_dqflush_done( | |||
| 878 | */ | 877 | */ |
| 879 | int | 878 | int |
| 880 | xfs_qm_dqflush( | 879 | xfs_qm_dqflush( |
| 881 | xfs_dquot_t *dqp, | 880 | struct xfs_dquot *dqp, |
| 882 | uint flags) | 881 | struct xfs_buf **bpp) |
| 883 | { | 882 | { |
| 884 | struct xfs_mount *mp = dqp->q_mount; | 883 | struct xfs_mount *mp = dqp->q_mount; |
| 885 | struct xfs_buf *bp; | 884 | struct xfs_buf *bp; |
| @@ -891,25 +890,30 @@ xfs_qm_dqflush( | |||
| 891 | 890 | ||
| 892 | trace_xfs_dqflush(dqp); | 891 | trace_xfs_dqflush(dqp); |
| 893 | 892 | ||
| 894 | /* | 893 | *bpp = NULL; |
| 895 | * If not dirty, or it's pinned and we are not supposed to block, nada. | 894 | |
| 896 | */ | ||
| 897 | if (!XFS_DQ_IS_DIRTY(dqp) || | ||
| 898 | ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) { | ||
| 899 | xfs_dqfunlock(dqp); | ||
| 900 | return 0; | ||
| 901 | } | ||
| 902 | xfs_qm_dqunpin_wait(dqp); | 895 | xfs_qm_dqunpin_wait(dqp); |
| 903 | 896 | ||
| 904 | /* | 897 | /* |
| 905 | * This may have been unpinned because the filesystem is shutting | 898 | * This may have been unpinned because the filesystem is shutting |
| 906 | * down forcibly. If that's the case we must not write this dquot | 899 | * down forcibly. If that's the case we must not write this dquot |
| 907 | * to disk, because the log record didn't make it to disk! | 900 | * to disk, because the log record didn't make it to disk. |
| 901 | * | ||
| 902 | * We also have to remove the log item from the AIL in this case, | ||
| 903 | * as we wait for an emptry AIL as part of the unmount process. | ||
| 908 | */ | 904 | */ |
| 909 | if (XFS_FORCED_SHUTDOWN(mp)) { | 905 | if (XFS_FORCED_SHUTDOWN(mp)) { |
| 906 | struct xfs_log_item *lip = &dqp->q_logitem.qli_item; | ||
| 910 | dqp->dq_flags &= ~XFS_DQ_DIRTY; | 907 | dqp->dq_flags &= ~XFS_DQ_DIRTY; |
| 911 | xfs_dqfunlock(dqp); | 908 | |
| 912 | return XFS_ERROR(EIO); | 909 | spin_lock(&mp->m_ail->xa_lock); |
| 910 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
| 911 | xfs_trans_ail_delete(mp->m_ail, lip, | ||
| 912 | SHUTDOWN_CORRUPT_INCORE); | ||
| 913 | else | ||
| 914 | spin_unlock(&mp->m_ail->xa_lock); | ||
| 915 | error = XFS_ERROR(EIO); | ||
| 916 | goto out_unlock; | ||
| 913 | } | 917 | } |
| 914 | 918 | ||
| 915 | /* | 919 | /* |
| @@ -917,11 +921,8 @@ xfs_qm_dqflush( | |||
| 917 | */ | 921 | */ |
| 918 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, | 922 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, |
| 919 | mp->m_quotainfo->qi_dqchunklen, 0, &bp); | 923 | mp->m_quotainfo->qi_dqchunklen, 0, &bp); |
| 920 | if (error) { | 924 | if (error) |
| 921 | ASSERT(error != ENOENT); | 925 | goto out_unlock; |
| 922 | xfs_dqfunlock(dqp); | ||
| 923 | return error; | ||
| 924 | } | ||
| 925 | 926 | ||
| 926 | /* | 927 | /* |
| 927 | * Calculate the location of the dquot inside the buffer. | 928 | * Calculate the location of the dquot inside the buffer. |
| @@ -967,20 +968,13 @@ xfs_qm_dqflush( | |||
| 967 | xfs_log_force(mp, 0); | 968 | xfs_log_force(mp, 0); |
| 968 | } | 969 | } |
| 969 | 970 | ||
| 970 | if (flags & SYNC_WAIT) | ||
| 971 | error = xfs_bwrite(bp); | ||
| 972 | else | ||
| 973 | xfs_buf_delwri_queue(bp); | ||
| 974 | |||
| 975 | xfs_buf_relse(bp); | ||
| 976 | |||
| 977 | trace_xfs_dqflush_done(dqp); | 971 | trace_xfs_dqflush_done(dqp); |
| 972 | *bpp = bp; | ||
| 973 | return 0; | ||
| 978 | 974 | ||
| 979 | /* | 975 | out_unlock: |
| 980 | * dqp is still locked, but caller is free to unlock it now. | 976 | xfs_dqfunlock(dqp); |
| 981 | */ | 977 | return XFS_ERROR(EIO); |
| 982 | return error; | ||
| 983 | |||
| 984 | } | 978 | } |
| 985 | 979 | ||
| 986 | /* | 980 | /* |
| @@ -1011,39 +1005,6 @@ xfs_dqlock2( | |||
| 1011 | } | 1005 | } |
| 1012 | } | 1006 | } |
| 1013 | 1007 | ||
| 1014 | /* | ||
| 1015 | * Give the buffer a little push if it is incore and | ||
| 1016 | * wait on the flush lock. | ||
| 1017 | */ | ||
| 1018 | void | ||
| 1019 | xfs_dqflock_pushbuf_wait( | ||
| 1020 | xfs_dquot_t *dqp) | ||
| 1021 | { | ||
| 1022 | xfs_mount_t *mp = dqp->q_mount; | ||
| 1023 | xfs_buf_t *bp; | ||
| 1024 | |||
| 1025 | /* | ||
| 1026 | * Check to see if the dquot has been flushed delayed | ||
| 1027 | * write. If so, grab its buffer and send it | ||
| 1028 | * out immediately. We'll be able to acquire | ||
| 1029 | * the flush lock when the I/O completes. | ||
| 1030 | */ | ||
| 1031 | bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno, | ||
| 1032 | mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); | ||
| 1033 | if (!bp) | ||
| 1034 | goto out_lock; | ||
| 1035 | |||
| 1036 | if (XFS_BUF_ISDELAYWRITE(bp)) { | ||
| 1037 | if (xfs_buf_ispinned(bp)) | ||
| 1038 | xfs_log_force(mp, 0); | ||
| 1039 | xfs_buf_delwri_promote(bp); | ||
| 1040 | wake_up_process(bp->b_target->bt_task); | ||
| 1041 | } | ||
| 1042 | xfs_buf_relse(bp); | ||
| 1043 | out_lock: | ||
| 1044 | xfs_dqflock(dqp); | ||
| 1045 | } | ||
| 1046 | |||
| 1047 | int __init | 1008 | int __init |
| 1048 | xfs_qm_init(void) | 1009 | xfs_qm_init(void) |
| 1049 | { | 1010 | { |
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index ef9190bd8b30..7d20af27346d 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
| @@ -141,7 +141,7 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) | |||
| 141 | extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, | 141 | extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, |
| 142 | uint, struct xfs_dquot **); | 142 | uint, struct xfs_dquot **); |
| 143 | extern void xfs_qm_dqdestroy(xfs_dquot_t *); | 143 | extern void xfs_qm_dqdestroy(xfs_dquot_t *); |
| 144 | extern int xfs_qm_dqflush(xfs_dquot_t *, uint); | 144 | extern int xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **); |
| 145 | extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); | 145 | extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); |
| 146 | extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, | 146 | extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, |
| 147 | xfs_disk_dquot_t *); | 147 | xfs_disk_dquot_t *); |
| @@ -152,7 +152,6 @@ extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, | |||
| 152 | extern void xfs_qm_dqput(xfs_dquot_t *); | 152 | extern void xfs_qm_dqput(xfs_dquot_t *); |
| 153 | 153 | ||
| 154 | extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); | 154 | extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); |
| 155 | extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp); | ||
| 156 | 155 | ||
| 157 | static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) | 156 | static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) |
| 158 | { | 157 | { |
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 34baeae45265..57aa4b03720c 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c | |||
| @@ -17,9 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_bit.h" | ||
| 21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
| @@ -108,38 +106,6 @@ xfs_qm_dquot_logitem_unpin( | |||
| 108 | wake_up(&dqp->q_pinwait); | 106 | wake_up(&dqp->q_pinwait); |
| 109 | } | 107 | } |
| 110 | 108 | ||
| 111 | /* | ||
| 112 | * Given the logitem, this writes the corresponding dquot entry to disk | ||
| 113 | * asynchronously. This is called with the dquot entry securely locked; | ||
| 114 | * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot | ||
| 115 | * at the end. | ||
| 116 | */ | ||
| 117 | STATIC void | ||
| 118 | xfs_qm_dquot_logitem_push( | ||
| 119 | struct xfs_log_item *lip) | ||
| 120 | { | ||
| 121 | struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; | ||
| 122 | int error; | ||
| 123 | |||
| 124 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
| 125 | ASSERT(!completion_done(&dqp->q_flush)); | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Since we were able to lock the dquot's flush lock and | ||
| 129 | * we found it on the AIL, the dquot must be dirty. This | ||
| 130 | * is because the dquot is removed from the AIL while still | ||
| 131 | * holding the flush lock in xfs_dqflush_done(). Thus, if | ||
| 132 | * we found it in the AIL and were able to obtain the flush | ||
| 133 | * lock without sleeping, then there must not have been | ||
| 134 | * anyone in the process of flushing the dquot. | ||
| 135 | */ | ||
| 136 | error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK); | ||
| 137 | if (error) | ||
| 138 | xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", | ||
| 139 | __func__, error, dqp); | ||
| 140 | xfs_dqunlock(dqp); | ||
| 141 | } | ||
| 142 | |||
| 143 | STATIC xfs_lsn_t | 109 | STATIC xfs_lsn_t |
| 144 | xfs_qm_dquot_logitem_committed( | 110 | xfs_qm_dquot_logitem_committed( |
| 145 | struct xfs_log_item *lip, | 111 | struct xfs_log_item *lip, |
| @@ -171,67 +137,15 @@ xfs_qm_dqunpin_wait( | |||
| 171 | wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); | 137 | wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); |
| 172 | } | 138 | } |
| 173 | 139 | ||
| 174 | /* | ||
| 175 | * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that | ||
| 176 | * the dquot is locked by us, but the flush lock isn't. So, here we are | ||
| 177 | * going to see if the relevant dquot buffer is incore, waiting on DELWRI. | ||
| 178 | * If so, we want to push it out to help us take this item off the AIL as soon | ||
| 179 | * as possible. | ||
| 180 | * | ||
| 181 | * We must not be holding the AIL lock at this point. Calling incore() to | ||
| 182 | * search the buffer cache can be a time consuming thing, and AIL lock is a | ||
| 183 | * spinlock. | ||
| 184 | */ | ||
| 185 | STATIC bool | ||
| 186 | xfs_qm_dquot_logitem_pushbuf( | ||
| 187 | struct xfs_log_item *lip) | ||
| 188 | { | ||
| 189 | struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); | ||
| 190 | struct xfs_dquot *dqp = qlip->qli_dquot; | ||
| 191 | struct xfs_buf *bp; | ||
| 192 | bool ret = true; | ||
| 193 | |||
| 194 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
| 195 | |||
| 196 | /* | ||
| 197 | * If flushlock isn't locked anymore, chances are that the | ||
| 198 | * inode flush completed and the inode was taken off the AIL. | ||
| 199 | * So, just get out. | ||
| 200 | */ | ||
| 201 | if (completion_done(&dqp->q_flush) || | ||
| 202 | !(lip->li_flags & XFS_LI_IN_AIL)) { | ||
| 203 | xfs_dqunlock(dqp); | ||
| 204 | return true; | ||
| 205 | } | ||
| 206 | |||
| 207 | bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, | ||
| 208 | dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); | ||
| 209 | xfs_dqunlock(dqp); | ||
| 210 | if (!bp) | ||
| 211 | return true; | ||
| 212 | if (XFS_BUF_ISDELAYWRITE(bp)) | ||
| 213 | xfs_buf_delwri_promote(bp); | ||
| 214 | if (xfs_buf_ispinned(bp)) | ||
| 215 | ret = false; | ||
| 216 | xfs_buf_relse(bp); | ||
| 217 | return ret; | ||
| 218 | } | ||
| 219 | |||
| 220 | /* | ||
| 221 | * This is called to attempt to lock the dquot associated with this | ||
| 222 | * dquot log item. Don't sleep on the dquot lock or the flush lock. | ||
| 223 | * If the flush lock is already held, indicating that the dquot has | ||
| 224 | * been or is in the process of being flushed, then see if we can | ||
| 225 | * find the dquot's buffer in the buffer cache without sleeping. If | ||
| 226 | * we can and it is marked delayed write, then we want to send it out. | ||
| 227 | * We delay doing so until the push routine, though, to avoid sleeping | ||
| 228 | * in any device strategy routines. | ||
| 229 | */ | ||
| 230 | STATIC uint | 140 | STATIC uint |
| 231 | xfs_qm_dquot_logitem_trylock( | 141 | xfs_qm_dquot_logitem_push( |
| 232 | struct xfs_log_item *lip) | 142 | struct xfs_log_item *lip, |
| 143 | struct list_head *buffer_list) | ||
| 233 | { | 144 | { |
| 234 | struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; | 145 | struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; |
| 146 | struct xfs_buf *bp = NULL; | ||
| 147 | uint rval = XFS_ITEM_SUCCESS; | ||
| 148 | int error; | ||
| 235 | 149 | ||
| 236 | if (atomic_read(&dqp->q_pincount) > 0) | 150 | if (atomic_read(&dqp->q_pincount) > 0) |
| 237 | return XFS_ITEM_PINNED; | 151 | return XFS_ITEM_PINNED; |
| @@ -239,16 +153,41 @@ xfs_qm_dquot_logitem_trylock( | |||
| 239 | if (!xfs_dqlock_nowait(dqp)) | 153 | if (!xfs_dqlock_nowait(dqp)) |
| 240 | return XFS_ITEM_LOCKED; | 154 | return XFS_ITEM_LOCKED; |
| 241 | 155 | ||
| 156 | /* | ||
| 157 | * Re-check the pincount now that we stabilized the value by | ||
| 158 | * taking the quota lock. | ||
| 159 | */ | ||
| 160 | if (atomic_read(&dqp->q_pincount) > 0) { | ||
| 161 | rval = XFS_ITEM_PINNED; | ||
| 162 | goto out_unlock; | ||
| 163 | } | ||
| 164 | |||
| 165 | /* | ||
| 166 | * Someone else is already flushing the dquot. Nothing we can do | ||
| 167 | * here but wait for the flush to finish and remove the item from | ||
| 168 | * the AIL. | ||
| 169 | */ | ||
| 242 | if (!xfs_dqflock_nowait(dqp)) { | 170 | if (!xfs_dqflock_nowait(dqp)) { |
| 243 | /* | 171 | rval = XFS_ITEM_FLUSHING; |
| 244 | * dquot has already been flushed to the backing buffer, | 172 | goto out_unlock; |
| 245 | * leave it locked, pushbuf routine will unlock it. | ||
| 246 | */ | ||
| 247 | return XFS_ITEM_PUSHBUF; | ||
| 248 | } | 173 | } |
| 249 | 174 | ||
| 250 | ASSERT(lip->li_flags & XFS_LI_IN_AIL); | 175 | spin_unlock(&lip->li_ailp->xa_lock); |
| 251 | return XFS_ITEM_SUCCESS; | 176 | |
| 177 | error = xfs_qm_dqflush(dqp, &bp); | ||
| 178 | if (error) { | ||
| 179 | xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", | ||
| 180 | __func__, error, dqp); | ||
| 181 | } else { | ||
| 182 | if (!xfs_buf_delwri_queue(bp, buffer_list)) | ||
| 183 | rval = XFS_ITEM_FLUSHING; | ||
| 184 | xfs_buf_relse(bp); | ||
| 185 | } | ||
| 186 | |||
| 187 | spin_lock(&lip->li_ailp->xa_lock); | ||
| 188 | out_unlock: | ||
| 189 | xfs_dqunlock(dqp); | ||
| 190 | return rval; | ||
| 252 | } | 191 | } |
| 253 | 192 | ||
| 254 | /* | 193 | /* |
| @@ -299,11 +238,9 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { | |||
| 299 | .iop_format = xfs_qm_dquot_logitem_format, | 238 | .iop_format = xfs_qm_dquot_logitem_format, |
| 300 | .iop_pin = xfs_qm_dquot_logitem_pin, | 239 | .iop_pin = xfs_qm_dquot_logitem_pin, |
| 301 | .iop_unpin = xfs_qm_dquot_logitem_unpin, | 240 | .iop_unpin = xfs_qm_dquot_logitem_unpin, |
| 302 | .iop_trylock = xfs_qm_dquot_logitem_trylock, | ||
| 303 | .iop_unlock = xfs_qm_dquot_logitem_unlock, | 241 | .iop_unlock = xfs_qm_dquot_logitem_unlock, |
| 304 | .iop_committed = xfs_qm_dquot_logitem_committed, | 242 | .iop_committed = xfs_qm_dquot_logitem_committed, |
| 305 | .iop_push = xfs_qm_dquot_logitem_push, | 243 | .iop_push = xfs_qm_dquot_logitem_push, |
| 306 | .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf, | ||
| 307 | .iop_committing = xfs_qm_dquot_logitem_committing | 244 | .iop_committing = xfs_qm_dquot_logitem_committing |
| 308 | }; | 245 | }; |
| 309 | 246 | ||
| @@ -398,11 +335,13 @@ xfs_qm_qoff_logitem_unpin( | |||
| 398 | } | 335 | } |
| 399 | 336 | ||
| 400 | /* | 337 | /* |
| 401 | * Quotaoff items have no locking, so just return success. | 338 | * There isn't much you can do to push a quotaoff item. It is simply |
| 339 | * stuck waiting for the log to be flushed to disk. | ||
| 402 | */ | 340 | */ |
| 403 | STATIC uint | 341 | STATIC uint |
| 404 | xfs_qm_qoff_logitem_trylock( | 342 | xfs_qm_qoff_logitem_push( |
| 405 | struct xfs_log_item *lip) | 343 | struct xfs_log_item *lip, |
| 344 | struct list_head *buffer_list) | ||
| 406 | { | 345 | { |
| 407 | return XFS_ITEM_LOCKED; | 346 | return XFS_ITEM_LOCKED; |
| 408 | } | 347 | } |
| @@ -429,17 +368,6 @@ xfs_qm_qoff_logitem_committed( | |||
| 429 | return lsn; | 368 | return lsn; |
| 430 | } | 369 | } |
| 431 | 370 | ||
| 432 | /* | ||
| 433 | * There isn't much you can do to push on an quotaoff item. It is simply | ||
| 434 | * stuck waiting for the log to be flushed to disk. | ||
| 435 | */ | ||
| 436 | STATIC void | ||
| 437 | xfs_qm_qoff_logitem_push( | ||
| 438 | struct xfs_log_item *lip) | ||
| 439 | { | ||
| 440 | } | ||
| 441 | |||
| 442 | |||
| 443 | STATIC xfs_lsn_t | 371 | STATIC xfs_lsn_t |
| 444 | xfs_qm_qoffend_logitem_committed( | 372 | xfs_qm_qoffend_logitem_committed( |
| 445 | struct xfs_log_item *lip, | 373 | struct xfs_log_item *lip, |
| @@ -454,7 +382,7 @@ xfs_qm_qoffend_logitem_committed( | |||
| 454 | * xfs_trans_ail_delete() drops the AIL lock. | 382 | * xfs_trans_ail_delete() drops the AIL lock. |
| 455 | */ | 383 | */ |
| 456 | spin_lock(&ailp->xa_lock); | 384 | spin_lock(&ailp->xa_lock); |
| 457 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); | 385 | xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR); |
| 458 | 386 | ||
| 459 | kmem_free(qfs); | 387 | kmem_free(qfs); |
| 460 | kmem_free(qfe); | 388 | kmem_free(qfe); |
| @@ -487,7 +415,6 @@ static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { | |||
| 487 | .iop_format = xfs_qm_qoff_logitem_format, | 415 | .iop_format = xfs_qm_qoff_logitem_format, |
| 488 | .iop_pin = xfs_qm_qoff_logitem_pin, | 416 | .iop_pin = xfs_qm_qoff_logitem_pin, |
| 489 | .iop_unpin = xfs_qm_qoff_logitem_unpin, | 417 | .iop_unpin = xfs_qm_qoff_logitem_unpin, |
| 490 | .iop_trylock = xfs_qm_qoff_logitem_trylock, | ||
| 491 | .iop_unlock = xfs_qm_qoff_logitem_unlock, | 418 | .iop_unlock = xfs_qm_qoff_logitem_unlock, |
| 492 | .iop_committed = xfs_qm_qoffend_logitem_committed, | 419 | .iop_committed = xfs_qm_qoffend_logitem_committed, |
| 493 | .iop_push = xfs_qm_qoff_logitem_push, | 420 | .iop_push = xfs_qm_qoff_logitem_push, |
| @@ -502,7 +429,6 @@ static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { | |||
| 502 | .iop_format = xfs_qm_qoff_logitem_format, | 429 | .iop_format = xfs_qm_qoff_logitem_format, |
| 503 | .iop_pin = xfs_qm_qoff_logitem_pin, | 430 | .iop_pin = xfs_qm_qoff_logitem_pin, |
| 504 | .iop_unpin = xfs_qm_qoff_logitem_unpin, | 431 | .iop_unpin = xfs_qm_qoff_logitem_unpin, |
| 505 | .iop_trylock = xfs_qm_qoff_logitem_trylock, | ||
| 506 | .iop_unlock = xfs_qm_qoff_logitem_unlock, | 432 | .iop_unlock = xfs_qm_qoff_logitem_unlock, |
| 507 | .iop_committed = xfs_qm_qoff_logitem_committed, | 433 | .iop_committed = xfs_qm_qoff_logitem_committed, |
| 508 | .iop_push = xfs_qm_qoff_logitem_push, | 434 | .iop_push = xfs_qm_qoff_logitem_push, |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 39f06336b99d..610456054dc2 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 558910f5e3c0..2d25d19c4ea1 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_types.h" | 19 | #include "xfs_types.h" |
| 20 | #include "xfs_inum.h" | ||
| 21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
| 22 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
| 23 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c new file mode 100644 index 000000000000..85e9f87a1a7c --- /dev/null +++ b/fs/xfs/xfs_extent_busy.c | |||
| @@ -0,0 +1,603 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
| 3 | * Copyright (c) 2010 David Chinner. | ||
| 4 | * Copyright (c) 2011 Christoph Hellwig. | ||
| 5 | * All Rights Reserved. | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it would be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | * GNU General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License | ||
| 17 | * along with this program; if not, write the Free Software Foundation, | ||
| 18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | #include "xfs.h" | ||
| 21 | #include "xfs_fs.h" | ||
| 22 | #include "xfs_types.h" | ||
| 23 | #include "xfs_log.h" | ||
| 24 | #include "xfs_trans.h" | ||
| 25 | #include "xfs_sb.h" | ||
| 26 | #include "xfs_ag.h" | ||
| 27 | #include "xfs_mount.h" | ||
| 28 | #include "xfs_bmap_btree.h" | ||
| 29 | #include "xfs_alloc.h" | ||
| 30 | #include "xfs_inode.h" | ||
| 31 | #include "xfs_extent_busy.h" | ||
| 32 | #include "xfs_trace.h" | ||
| 33 | |||
| 34 | void | ||
| 35 | xfs_extent_busy_insert( | ||
| 36 | struct xfs_trans *tp, | ||
| 37 | xfs_agnumber_t agno, | ||
| 38 | xfs_agblock_t bno, | ||
| 39 | xfs_extlen_t len, | ||
| 40 | unsigned int flags) | ||
| 41 | { | ||
| 42 | struct xfs_extent_busy *new; | ||
| 43 | struct xfs_extent_busy *busyp; | ||
| 44 | struct xfs_perag *pag; | ||
| 45 | struct rb_node **rbp; | ||
| 46 | struct rb_node *parent = NULL; | ||
| 47 | |||
| 48 | new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL); | ||
| 49 | if (!new) { | ||
| 50 | /* | ||
| 51 | * No Memory! Since it is now not possible to track the free | ||
| 52 | * block, make this a synchronous transaction to insure that | ||
| 53 | * the block is not reused before this transaction commits. | ||
| 54 | */ | ||
| 55 | trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len); | ||
| 56 | xfs_trans_set_sync(tp); | ||
| 57 | return; | ||
| 58 | } | ||
| 59 | |||
| 60 | new->agno = agno; | ||
| 61 | new->bno = bno; | ||
| 62 | new->length = len; | ||
| 63 | INIT_LIST_HEAD(&new->list); | ||
| 64 | new->flags = flags; | ||
| 65 | |||
| 66 | /* trace before insert to be able to see failed inserts */ | ||
| 67 | trace_xfs_extent_busy(tp->t_mountp, agno, bno, len); | ||
| 68 | |||
| 69 | pag = xfs_perag_get(tp->t_mountp, new->agno); | ||
| 70 | spin_lock(&pag->pagb_lock); | ||
| 71 | rbp = &pag->pagb_tree.rb_node; | ||
| 72 | while (*rbp) { | ||
| 73 | parent = *rbp; | ||
| 74 | busyp = rb_entry(parent, struct xfs_extent_busy, rb_node); | ||
| 75 | |||
| 76 | if (new->bno < busyp->bno) { | ||
| 77 | rbp = &(*rbp)->rb_left; | ||
| 78 | ASSERT(new->bno + new->length <= busyp->bno); | ||
| 79 | } else if (new->bno > busyp->bno) { | ||
| 80 | rbp = &(*rbp)->rb_right; | ||
| 81 | ASSERT(bno >= busyp->bno + busyp->length); | ||
| 82 | } else { | ||
| 83 | ASSERT(0); | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | rb_link_node(&new->rb_node, parent, rbp); | ||
| 88 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | ||
| 89 | |||
| 90 | list_add(&new->list, &tp->t_busy); | ||
| 91 | spin_unlock(&pag->pagb_lock); | ||
| 92 | xfs_perag_put(pag); | ||
| 93 | } | ||
| 94 | |||
| 95 | /* | ||
| 96 | * Search for a busy extent within the range of the extent we are about to | ||
| 97 | * allocate. You need to be holding the busy extent tree lock when calling | ||
| 98 | * xfs_extent_busy_search(). This function returns 0 for no overlapping busy | ||
| 99 | * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact | ||
| 100 | * match. This is done so that a non-zero return indicates an overlap that | ||
| 101 | * will require a synchronous transaction, but it can still be | ||
| 102 | * used to distinguish between a partial or exact match. | ||
| 103 | */ | ||
| 104 | int | ||
| 105 | xfs_extent_busy_search( | ||
| 106 | struct xfs_mount *mp, | ||
| 107 | xfs_agnumber_t agno, | ||
| 108 | xfs_agblock_t bno, | ||
| 109 | xfs_extlen_t len) | ||
| 110 | { | ||
| 111 | struct xfs_perag *pag; | ||
| 112 | struct rb_node *rbp; | ||
| 113 | struct xfs_extent_busy *busyp; | ||
| 114 | int match = 0; | ||
| 115 | |||
| 116 | pag = xfs_perag_get(mp, agno); | ||
| 117 | spin_lock(&pag->pagb_lock); | ||
| 118 | |||
| 119 | rbp = pag->pagb_tree.rb_node; | ||
| 120 | |||
| 121 | /* find closest start bno overlap */ | ||
| 122 | while (rbp) { | ||
| 123 | busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node); | ||
| 124 | if (bno < busyp->bno) { | ||
| 125 | /* may overlap, but exact start block is lower */ | ||
| 126 | if (bno + len > busyp->bno) | ||
| 127 | match = -1; | ||
| 128 | rbp = rbp->rb_left; | ||
| 129 | } else if (bno > busyp->bno) { | ||
| 130 | /* may overlap, but exact start block is higher */ | ||
| 131 | if (bno < busyp->bno + busyp->length) | ||
| 132 | match = -1; | ||
| 133 | rbp = rbp->rb_right; | ||
| 134 | } else { | ||
| 135 | /* bno matches busyp, length determines exact match */ | ||
| 136 | match = (busyp->length == len) ? 1 : -1; | ||
| 137 | break; | ||
| 138 | } | ||
| 139 | } | ||
| 140 | spin_unlock(&pag->pagb_lock); | ||
| 141 | xfs_perag_put(pag); | ||
| 142 | return match; | ||
| 143 | } | ||
| 144 | |||
| 145 | /* | ||
| 146 | * The found free extent [fbno, fend] overlaps part or all of the given busy | ||
| 147 | * extent. If the overlap covers the beginning, the end, or all of the busy | ||
| 148 | * extent, the overlapping portion can be made unbusy and used for the | ||
| 149 | * allocation. We can't split a busy extent because we can't modify a | ||
| 150 | * transaction/CIL context busy list, but we can update an entries block | ||
| 151 | * number or length. | ||
| 152 | * | ||
| 153 | * Returns true if the extent can safely be reused, or false if the search | ||
| 154 | * needs to be restarted. | ||
| 155 | */ | ||
| 156 | STATIC bool | ||
| 157 | xfs_extent_busy_update_extent( | ||
| 158 | struct xfs_mount *mp, | ||
| 159 | struct xfs_perag *pag, | ||
| 160 | struct xfs_extent_busy *busyp, | ||
| 161 | xfs_agblock_t fbno, | ||
| 162 | xfs_extlen_t flen, | ||
| 163 | bool userdata) | ||
| 164 | { | ||
| 165 | xfs_agblock_t fend = fbno + flen; | ||
| 166 | xfs_agblock_t bbno = busyp->bno; | ||
| 167 | xfs_agblock_t bend = bbno + busyp->length; | ||
| 168 | |||
| 169 | /* | ||
| 170 | * This extent is currently being discarded. Give the thread | ||
| 171 | * performing the discard a chance to mark the extent unbusy | ||
| 172 | * and retry. | ||
| 173 | */ | ||
| 174 | if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) { | ||
| 175 | spin_unlock(&pag->pagb_lock); | ||
| 176 | delay(1); | ||
| 177 | spin_lock(&pag->pagb_lock); | ||
| 178 | return false; | ||
| 179 | } | ||
| 180 | |||
| 181 | /* | ||
| 182 | * If there is a busy extent overlapping a user allocation, we have | ||
| 183 | * no choice but to force the log and retry the search. | ||
| 184 | * | ||
| 185 | * Fortunately this does not happen during normal operation, but | ||
| 186 | * only if the filesystem is very low on space and has to dip into | ||
| 187 | * the AGFL for normal allocations. | ||
| 188 | */ | ||
| 189 | if (userdata) | ||
| 190 | goto out_force_log; | ||
| 191 | |||
| 192 | if (bbno < fbno && bend > fend) { | ||
| 193 | /* | ||
| 194 | * Case 1: | ||
| 195 | * bbno bend | ||
| 196 | * +BBBBBBBBBBBBBBBBB+ | ||
| 197 | * +---------+ | ||
| 198 | * fbno fend | ||
| 199 | */ | ||
| 200 | |||
| 201 | /* | ||
| 202 | * We would have to split the busy extent to be able to track | ||
| 203 | * it correct, which we cannot do because we would have to | ||
| 204 | * modify the list of busy extents attached to the transaction | ||
| 205 | * or CIL context, which is immutable. | ||
| 206 | * | ||
| 207 | * Force out the log to clear the busy extent and retry the | ||
| 208 | * search. | ||
| 209 | */ | ||
| 210 | goto out_force_log; | ||
| 211 | } else if (bbno >= fbno && bend <= fend) { | ||
| 212 | /* | ||
| 213 | * Case 2: | ||
| 214 | * bbno bend | ||
| 215 | * +BBBBBBBBBBBBBBBBB+ | ||
| 216 | * +-----------------+ | ||
| 217 | * fbno fend | ||
| 218 | * | ||
| 219 | * Case 3: | ||
| 220 | * bbno bend | ||
| 221 | * +BBBBBBBBBBBBBBBBB+ | ||
| 222 | * +--------------------------+ | ||
| 223 | * fbno fend | ||
| 224 | * | ||
| 225 | * Case 4: | ||
| 226 | * bbno bend | ||
| 227 | * +BBBBBBBBBBBBBBBBB+ | ||
| 228 | * +--------------------------+ | ||
| 229 | * fbno fend | ||
| 230 | * | ||
| 231 | * Case 5: | ||
| 232 | * bbno bend | ||
| 233 | * +BBBBBBBBBBBBBBBBB+ | ||
| 234 | * +-----------------------------------+ | ||
| 235 | * fbno fend | ||
| 236 | * | ||
| 237 | */ | ||
| 238 | |||
| 239 | /* | ||
| 240 | * The busy extent is fully covered by the extent we are | ||
| 241 | * allocating, and can simply be removed from the rbtree. | ||
| 242 | * However we cannot remove it from the immutable list | ||
| 243 | * tracking busy extents in the transaction or CIL context, | ||
| 244 | * so set the length to zero to mark it invalid. | ||
| 245 | * | ||
| 246 | * We also need to restart the busy extent search from the | ||
| 247 | * tree root, because erasing the node can rearrange the | ||
| 248 | * tree topology. | ||
| 249 | */ | ||
| 250 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
| 251 | busyp->length = 0; | ||
| 252 | return false; | ||
| 253 | } else if (fend < bend) { | ||
| 254 | /* | ||
| 255 | * Case 6: | ||
| 256 | * bbno bend | ||
| 257 | * +BBBBBBBBBBBBBBBBB+ | ||
| 258 | * +---------+ | ||
| 259 | * fbno fend | ||
| 260 | * | ||
| 261 | * Case 7: | ||
| 262 | * bbno bend | ||
| 263 | * +BBBBBBBBBBBBBBBBB+ | ||
| 264 | * +------------------+ | ||
| 265 | * fbno fend | ||
| 266 | * | ||
| 267 | */ | ||
| 268 | busyp->bno = fend; | ||
| 269 | } else if (bbno < fbno) { | ||
| 270 | /* | ||
| 271 | * Case 8: | ||
| 272 | * bbno bend | ||
| 273 | * +BBBBBBBBBBBBBBBBB+ | ||
| 274 | * +-------------+ | ||
| 275 | * fbno fend | ||
| 276 | * | ||
| 277 | * Case 9: | ||
| 278 | * bbno bend | ||
| 279 | * +BBBBBBBBBBBBBBBBB+ | ||
| 280 | * +----------------------+ | ||
| 281 | * fbno fend | ||
| 282 | */ | ||
| 283 | busyp->length = fbno - busyp->bno; | ||
| 284 | } else { | ||
| 285 | ASSERT(0); | ||
| 286 | } | ||
| 287 | |||
| 288 | trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen); | ||
| 289 | return true; | ||
| 290 | |||
| 291 | out_force_log: | ||
| 292 | spin_unlock(&pag->pagb_lock); | ||
| 293 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
| 294 | trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen); | ||
| 295 | spin_lock(&pag->pagb_lock); | ||
| 296 | return false; | ||
| 297 | } | ||
| 298 | |||
| 299 | |||
| 300 | /* | ||
| 301 | * For a given extent [fbno, flen], make sure we can reuse it safely. | ||
| 302 | */ | ||
| 303 | void | ||
| 304 | xfs_extent_busy_reuse( | ||
| 305 | struct xfs_mount *mp, | ||
| 306 | xfs_agnumber_t agno, | ||
| 307 | xfs_agblock_t fbno, | ||
| 308 | xfs_extlen_t flen, | ||
| 309 | bool userdata) | ||
| 310 | { | ||
| 311 | struct xfs_perag *pag; | ||
| 312 | struct rb_node *rbp; | ||
| 313 | |||
| 314 | ASSERT(flen > 0); | ||
| 315 | |||
| 316 | pag = xfs_perag_get(mp, agno); | ||
| 317 | spin_lock(&pag->pagb_lock); | ||
| 318 | restart: | ||
| 319 | rbp = pag->pagb_tree.rb_node; | ||
| 320 | while (rbp) { | ||
| 321 | struct xfs_extent_busy *busyp = | ||
| 322 | rb_entry(rbp, struct xfs_extent_busy, rb_node); | ||
| 323 | xfs_agblock_t bbno = busyp->bno; | ||
| 324 | xfs_agblock_t bend = bbno + busyp->length; | ||
| 325 | |||
| 326 | if (fbno + flen <= bbno) { | ||
| 327 | rbp = rbp->rb_left; | ||
| 328 | continue; | ||
| 329 | } else if (fbno >= bend) { | ||
| 330 | rbp = rbp->rb_right; | ||
| 331 | continue; | ||
| 332 | } | ||
| 333 | |||
| 334 | if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen, | ||
| 335 | userdata)) | ||
| 336 | goto restart; | ||
| 337 | } | ||
| 338 | spin_unlock(&pag->pagb_lock); | ||
| 339 | xfs_perag_put(pag); | ||
| 340 | } | ||
| 341 | |||
| 342 | /* | ||
| 343 | * For a given extent [fbno, flen], search the busy extent list to find a | ||
| 344 | * subset of the extent that is not busy. If *rlen is smaller than | ||
| 345 | * args->minlen no suitable extent could be found, and the higher level | ||
| 346 | * code needs to force out the log and retry the allocation. | ||
| 347 | */ | ||
| 348 | void | ||
| 349 | xfs_extent_busy_trim( | ||
| 350 | struct xfs_alloc_arg *args, | ||
| 351 | xfs_agblock_t bno, | ||
| 352 | xfs_extlen_t len, | ||
| 353 | xfs_agblock_t *rbno, | ||
| 354 | xfs_extlen_t *rlen) | ||
| 355 | { | ||
| 356 | xfs_agblock_t fbno; | ||
| 357 | xfs_extlen_t flen; | ||
| 358 | struct rb_node *rbp; | ||
| 359 | |||
| 360 | ASSERT(len > 0); | ||
| 361 | |||
| 362 | spin_lock(&args->pag->pagb_lock); | ||
| 363 | restart: | ||
| 364 | fbno = bno; | ||
| 365 | flen = len; | ||
| 366 | rbp = args->pag->pagb_tree.rb_node; | ||
| 367 | while (rbp && flen >= args->minlen) { | ||
| 368 | struct xfs_extent_busy *busyp = | ||
| 369 | rb_entry(rbp, struct xfs_extent_busy, rb_node); | ||
| 370 | xfs_agblock_t fend = fbno + flen; | ||
| 371 | xfs_agblock_t bbno = busyp->bno; | ||
| 372 | xfs_agblock_t bend = bbno + busyp->length; | ||
| 373 | |||
| 374 | if (fend <= bbno) { | ||
| 375 | rbp = rbp->rb_left; | ||
| 376 | continue; | ||
| 377 | } else if (fbno >= bend) { | ||
| 378 | rbp = rbp->rb_right; | ||
| 379 | continue; | ||
| 380 | } | ||
| 381 | |||
| 382 | /* | ||
| 383 | * If this is a metadata allocation, try to reuse the busy | ||
| 384 | * extent instead of trimming the allocation. | ||
| 385 | */ | ||
| 386 | if (!args->userdata && | ||
| 387 | !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) { | ||
| 388 | if (!xfs_extent_busy_update_extent(args->mp, args->pag, | ||
| 389 | busyp, fbno, flen, | ||
| 390 | false)) | ||
| 391 | goto restart; | ||
| 392 | continue; | ||
| 393 | } | ||
| 394 | |||
| 395 | if (bbno <= fbno) { | ||
| 396 | /* start overlap */ | ||
| 397 | |||
| 398 | /* | ||
| 399 | * Case 1: | ||
| 400 | * bbno bend | ||
| 401 | * +BBBBBBBBBBBBBBBBB+ | ||
| 402 | * +---------+ | ||
| 403 | * fbno fend | ||
| 404 | * | ||
| 405 | * Case 2: | ||
| 406 | * bbno bend | ||
| 407 | * +BBBBBBBBBBBBBBBBB+ | ||
| 408 | * +-------------+ | ||
| 409 | * fbno fend | ||
| 410 | * | ||
| 411 | * Case 3: | ||
| 412 | * bbno bend | ||
| 413 | * +BBBBBBBBBBBBBBBBB+ | ||
| 414 | * +-------------+ | ||
| 415 | * fbno fend | ||
| 416 | * | ||
| 417 | * Case 4: | ||
| 418 | * bbno bend | ||
| 419 | * +BBBBBBBBBBBBBBBBB+ | ||
| 420 | * +-----------------+ | ||
| 421 | * fbno fend | ||
| 422 | * | ||
| 423 | * No unbusy region in extent, return failure. | ||
| 424 | */ | ||
| 425 | if (fend <= bend) | ||
| 426 | goto fail; | ||
| 427 | |||
| 428 | /* | ||
| 429 | * Case 5: | ||
| 430 | * bbno bend | ||
| 431 | * +BBBBBBBBBBBBBBBBB+ | ||
| 432 | * +----------------------+ | ||
| 433 | * fbno fend | ||
| 434 | * | ||
| 435 | * Case 6: | ||
| 436 | * bbno bend | ||
| 437 | * +BBBBBBBBBBBBBBBBB+ | ||
| 438 | * +--------------------------+ | ||
| 439 | * fbno fend | ||
| 440 | * | ||
| 441 | * Needs to be trimmed to: | ||
| 442 | * +-------+ | ||
| 443 | * fbno fend | ||
| 444 | */ | ||
| 445 | fbno = bend; | ||
| 446 | } else if (bend >= fend) { | ||
| 447 | /* end overlap */ | ||
| 448 | |||
| 449 | /* | ||
| 450 | * Case 7: | ||
| 451 | * bbno bend | ||
| 452 | * +BBBBBBBBBBBBBBBBB+ | ||
| 453 | * +------------------+ | ||
| 454 | * fbno fend | ||
| 455 | * | ||
| 456 | * Case 8: | ||
| 457 | * bbno bend | ||
| 458 | * +BBBBBBBBBBBBBBBBB+ | ||
| 459 | * +--------------------------+ | ||
| 460 | * fbno fend | ||
| 461 | * | ||
| 462 | * Needs to be trimmed to: | ||
| 463 | * +-------+ | ||
| 464 | * fbno fend | ||
| 465 | */ | ||
| 466 | fend = bbno; | ||
| 467 | } else { | ||
| 468 | /* middle overlap */ | ||
| 469 | |||
| 470 | /* | ||
| 471 | * Case 9: | ||
| 472 | * bbno bend | ||
| 473 | * +BBBBBBBBBBBBBBBBB+ | ||
| 474 | * +-----------------------------------+ | ||
| 475 | * fbno fend | ||
| 476 | * | ||
| 477 | * Can be trimmed to: | ||
| 478 | * +-------+ OR +-------+ | ||
| 479 | * fbno fend fbno fend | ||
| 480 | * | ||
| 481 | * Backward allocation leads to significant | ||
| 482 | * fragmentation of directories, which degrades | ||
| 483 | * directory performance, therefore we always want to | ||
| 484 | * choose the option that produces forward allocation | ||
| 485 | * patterns. | ||
| 486 | * Preferring the lower bno extent will make the next | ||
| 487 | * request use "fend" as the start of the next | ||
| 488 | * allocation; if the segment is no longer busy at | ||
| 489 | * that point, we'll get a contiguous allocation, but | ||
| 490 | * even if it is still busy, we will get a forward | ||
| 491 | * allocation. | ||
| 492 | * We try to avoid choosing the segment at "bend", | ||
| 493 | * because that can lead to the next allocation | ||
| 494 | * taking the segment at "fbno", which would be a | ||
| 495 | * backward allocation. We only use the segment at | ||
| 496 | * "fbno" if it is much larger than the current | ||
| 497 | * requested size, because in that case there's a | ||
| 498 | * good chance subsequent allocations will be | ||
| 499 | * contiguous. | ||
| 500 | */ | ||
| 501 | if (bbno - fbno >= args->maxlen) { | ||
| 502 | /* left candidate fits perfect */ | ||
| 503 | fend = bbno; | ||
| 504 | } else if (fend - bend >= args->maxlen * 4) { | ||
| 505 | /* right candidate has enough free space */ | ||
| 506 | fbno = bend; | ||
| 507 | } else if (bbno - fbno >= args->minlen) { | ||
| 508 | /* left candidate fits minimum requirement */ | ||
| 509 | fend = bbno; | ||
| 510 | } else { | ||
| 511 | goto fail; | ||
| 512 | } | ||
| 513 | } | ||
| 514 | |||
| 515 | flen = fend - fbno; | ||
| 516 | } | ||
| 517 | spin_unlock(&args->pag->pagb_lock); | ||
| 518 | |||
| 519 | if (fbno != bno || flen != len) { | ||
| 520 | trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, | ||
| 521 | fbno, flen); | ||
| 522 | } | ||
| 523 | *rbno = fbno; | ||
| 524 | *rlen = flen; | ||
| 525 | return; | ||
| 526 | fail: | ||
| 527 | /* | ||
| 528 | * Return a zero extent length as failure indications. All callers | ||
| 529 | * re-check if the trimmed extent satisfies the minlen requirement. | ||
| 530 | */ | ||
| 531 | spin_unlock(&args->pag->pagb_lock); | ||
| 532 | trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0); | ||
| 533 | *rbno = fbno; | ||
| 534 | *rlen = 0; | ||
| 535 | } | ||
| 536 | |||
| 537 | STATIC void | ||
| 538 | xfs_extent_busy_clear_one( | ||
| 539 | struct xfs_mount *mp, | ||
| 540 | struct xfs_perag *pag, | ||
| 541 | struct xfs_extent_busy *busyp) | ||
| 542 | { | ||
| 543 | if (busyp->length) { | ||
| 544 | trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno, | ||
| 545 | busyp->length); | ||
| 546 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
| 547 | } | ||
| 548 | |||
| 549 | list_del_init(&busyp->list); | ||
| 550 | kmem_free(busyp); | ||
| 551 | } | ||
| 552 | |||
| 553 | /* | ||
| 554 | * Remove all extents on the passed in list from the busy extents tree. | ||
| 555 | * If do_discard is set skip extents that need to be discarded, and mark | ||
| 556 | * these as undergoing a discard operation instead. | ||
| 557 | */ | ||
| 558 | void | ||
| 559 | xfs_extent_busy_clear( | ||
| 560 | struct xfs_mount *mp, | ||
| 561 | struct list_head *list, | ||
| 562 | bool do_discard) | ||
| 563 | { | ||
| 564 | struct xfs_extent_busy *busyp, *n; | ||
| 565 | struct xfs_perag *pag = NULL; | ||
| 566 | xfs_agnumber_t agno = NULLAGNUMBER; | ||
| 567 | |||
| 568 | list_for_each_entry_safe(busyp, n, list, list) { | ||
| 569 | if (busyp->agno != agno) { | ||
| 570 | if (pag) { | ||
| 571 | spin_unlock(&pag->pagb_lock); | ||
| 572 | xfs_perag_put(pag); | ||
| 573 | } | ||
| 574 | pag = xfs_perag_get(mp, busyp->agno); | ||
| 575 | spin_lock(&pag->pagb_lock); | ||
| 576 | agno = busyp->agno; | ||
| 577 | } | ||
| 578 | |||
| 579 | if (do_discard && busyp->length && | ||
| 580 | !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) | ||
| 581 | busyp->flags = XFS_EXTENT_BUSY_DISCARDED; | ||
| 582 | else | ||
| 583 | xfs_extent_busy_clear_one(mp, pag, busyp); | ||
| 584 | } | ||
| 585 | |||
| 586 | if (pag) { | ||
| 587 | spin_unlock(&pag->pagb_lock); | ||
| 588 | xfs_perag_put(pag); | ||
| 589 | } | ||
| 590 | } | ||
| 591 | |||
| 592 | /* | ||
| 593 | * Callback for list_sort to sort busy extents by the AG they reside in. | ||
| 594 | */ | ||
| 595 | int | ||
| 596 | xfs_extent_busy_ag_cmp( | ||
| 597 | void *priv, | ||
| 598 | struct list_head *a, | ||
| 599 | struct list_head *b) | ||
| 600 | { | ||
| 601 | return container_of(a, struct xfs_extent_busy, list)->agno - | ||
| 602 | container_of(b, struct xfs_extent_busy, list)->agno; | ||
| 603 | } | ||
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h new file mode 100644 index 000000000000..985412d65ba5 --- /dev/null +++ b/fs/xfs/xfs_extent_busy.h | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
| 3 | * Copyright (c) 2010 David Chinner. | ||
| 4 | * Copyright (c) 2011 Christoph Hellwig. | ||
| 5 | * All Rights Reserved. | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it would be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | * GNU General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License | ||
| 17 | * along with this program; if not, write the Free Software Foundation, | ||
| 18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | #ifndef __XFS_EXTENT_BUSY_H__ | ||
| 21 | #define __XFS_EXTENT_BUSY_H__ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that | ||
| 25 | * have been freed but whose transactions aren't committed to disk yet. | ||
| 26 | * | ||
| 27 | * Note that we use the transaction ID to record the transaction, not the | ||
| 28 | * transaction structure itself. See xfs_extent_busy_insert() for details. | ||
| 29 | */ | ||
| 30 | struct xfs_extent_busy { | ||
| 31 | struct rb_node rb_node; /* ag by-bno indexed search tree */ | ||
| 32 | struct list_head list; /* transaction busy extent list */ | ||
| 33 | xfs_agnumber_t agno; | ||
| 34 | xfs_agblock_t bno; | ||
| 35 | xfs_extlen_t length; | ||
| 36 | unsigned int flags; | ||
| 37 | #define XFS_EXTENT_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ | ||
| 38 | #define XFS_EXTENT_BUSY_SKIP_DISCARD 0x02 /* do not discard */ | ||
| 39 | }; | ||
| 40 | |||
| 41 | void | ||
| 42 | xfs_extent_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, | ||
| 43 | xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); | ||
| 44 | |||
| 45 | void | ||
| 46 | xfs_extent_busy_clear(struct xfs_mount *mp, struct list_head *list, | ||
| 47 | bool do_discard); | ||
| 48 | |||
| 49 | int | ||
| 50 | xfs_extent_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
| 51 | xfs_agblock_t bno, xfs_extlen_t len); | ||
| 52 | |||
| 53 | void | ||
| 54 | xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
| 55 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); | ||
| 56 | |||
| 57 | void | ||
| 58 | xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t bno, | ||
| 59 | xfs_extlen_t len, xfs_agblock_t *rbno, xfs_extlen_t *rlen); | ||
| 60 | |||
| 61 | int | ||
| 62 | xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); | ||
| 63 | |||
| 64 | static inline void xfs_extent_busy_sort(struct list_head *list) | ||
| 65 | { | ||
| 66 | list_sort(NULL, list, xfs_extent_busy_ag_cmp); | ||
| 67 | } | ||
| 68 | |||
| 69 | #endif /* __XFS_EXTENT_BUSY_H__ */ | ||
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 35c2aff38b20..feb36d7551ae 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_buf_item.h" | 23 | #include "xfs_buf_item.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| @@ -64,7 +63,8 @@ __xfs_efi_release( | |||
| 64 | if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { | 63 | if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { |
| 65 | spin_lock(&ailp->xa_lock); | 64 | spin_lock(&ailp->xa_lock); |
| 66 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 65 | /* xfs_trans_ail_delete() drops the AIL lock. */ |
| 67 | xfs_trans_ail_delete(ailp, &efip->efi_item); | 66 | xfs_trans_ail_delete(ailp, &efip->efi_item, |
| 67 | SHUTDOWN_LOG_IO_ERROR); | ||
| 68 | xfs_efi_item_free(efip); | 68 | xfs_efi_item_free(efip); |
| 69 | } | 69 | } |
| 70 | } | 70 | } |
| @@ -147,22 +147,20 @@ xfs_efi_item_unpin( | |||
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | /* | 149 | /* |
| 150 | * Efi items have no locking or pushing. However, since EFIs are | 150 | * Efi items have no locking or pushing. However, since EFIs are pulled from |
| 151 | * pulled from the AIL when their corresponding EFDs are committed | 151 | * the AIL when their corresponding EFDs are committed to disk, their situation |
| 152 | * to disk, their situation is very similar to being pinned. Return | 152 | * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller |
| 153 | * XFS_ITEM_PINNED so that the caller will eventually flush the log. | 153 | * will eventually flush the log. This should help in getting the EFI out of |
| 154 | * This should help in getting the EFI out of the AIL. | 154 | * the AIL. |
| 155 | */ | 155 | */ |
| 156 | STATIC uint | 156 | STATIC uint |
| 157 | xfs_efi_item_trylock( | 157 | xfs_efi_item_push( |
| 158 | struct xfs_log_item *lip) | 158 | struct xfs_log_item *lip, |
| 159 | struct list_head *buffer_list) | ||
| 159 | { | 160 | { |
| 160 | return XFS_ITEM_PINNED; | 161 | return XFS_ITEM_PINNED; |
| 161 | } | 162 | } |
| 162 | 163 | ||
| 163 | /* | ||
| 164 | * Efi items have no locking, so just return. | ||
| 165 | */ | ||
| 166 | STATIC void | 164 | STATIC void |
| 167 | xfs_efi_item_unlock( | 165 | xfs_efi_item_unlock( |
| 168 | struct xfs_log_item *lip) | 166 | struct xfs_log_item *lip) |
| @@ -190,17 +188,6 @@ xfs_efi_item_committed( | |||
| 190 | } | 188 | } |
| 191 | 189 | ||
| 192 | /* | 190 | /* |
| 193 | * There isn't much you can do to push on an efi item. It is simply | ||
| 194 | * stuck waiting for all of its corresponding efd items to be | ||
| 195 | * committed to disk. | ||
| 196 | */ | ||
| 197 | STATIC void | ||
| 198 | xfs_efi_item_push( | ||
| 199 | struct xfs_log_item *lip) | ||
| 200 | { | ||
| 201 | } | ||
| 202 | |||
| 203 | /* | ||
| 204 | * The EFI dependency tracking op doesn't do squat. It can't because | 191 | * The EFI dependency tracking op doesn't do squat. It can't because |
| 205 | * it doesn't know where the free extent is coming from. The dependency | 192 | * it doesn't know where the free extent is coming from. The dependency |
| 206 | * tracking has to be handled by the "enclosing" metadata object. For | 193 | * tracking has to be handled by the "enclosing" metadata object. For |
| @@ -222,7 +209,6 @@ static const struct xfs_item_ops xfs_efi_item_ops = { | |||
| 222 | .iop_format = xfs_efi_item_format, | 209 | .iop_format = xfs_efi_item_format, |
| 223 | .iop_pin = xfs_efi_item_pin, | 210 | .iop_pin = xfs_efi_item_pin, |
| 224 | .iop_unpin = xfs_efi_item_unpin, | 211 | .iop_unpin = xfs_efi_item_unpin, |
| 225 | .iop_trylock = xfs_efi_item_trylock, | ||
| 226 | .iop_unlock = xfs_efi_item_unlock, | 212 | .iop_unlock = xfs_efi_item_unlock, |
| 227 | .iop_committed = xfs_efi_item_committed, | 213 | .iop_committed = xfs_efi_item_committed, |
| 228 | .iop_push = xfs_efi_item_push, | 214 | .iop_push = xfs_efi_item_push, |
| @@ -404,19 +390,17 @@ xfs_efd_item_unpin( | |||
| 404 | } | 390 | } |
| 405 | 391 | ||
| 406 | /* | 392 | /* |
| 407 | * Efd items have no locking, so just return success. | 393 | * There isn't much you can do to push on an efd item. It is simply stuck |
| 394 | * waiting for the log to be flushed to disk. | ||
| 408 | */ | 395 | */ |
| 409 | STATIC uint | 396 | STATIC uint |
| 410 | xfs_efd_item_trylock( | 397 | xfs_efd_item_push( |
| 411 | struct xfs_log_item *lip) | 398 | struct xfs_log_item *lip, |
| 399 | struct list_head *buffer_list) | ||
| 412 | { | 400 | { |
| 413 | return XFS_ITEM_LOCKED; | 401 | return XFS_ITEM_PINNED; |
| 414 | } | 402 | } |
| 415 | 403 | ||
| 416 | /* | ||
| 417 | * Efd items have no locking or pushing, so return failure | ||
| 418 | * so that the caller doesn't bother with us. | ||
| 419 | */ | ||
| 420 | STATIC void | 404 | STATIC void |
| 421 | xfs_efd_item_unlock( | 405 | xfs_efd_item_unlock( |
| 422 | struct xfs_log_item *lip) | 406 | struct xfs_log_item *lip) |
| @@ -451,16 +435,6 @@ xfs_efd_item_committed( | |||
| 451 | } | 435 | } |
| 452 | 436 | ||
| 453 | /* | 437 | /* |
| 454 | * There isn't much you can do to push on an efd item. It is simply | ||
| 455 | * stuck waiting for the log to be flushed to disk. | ||
| 456 | */ | ||
| 457 | STATIC void | ||
| 458 | xfs_efd_item_push( | ||
| 459 | struct xfs_log_item *lip) | ||
| 460 | { | ||
| 461 | } | ||
| 462 | |||
| 463 | /* | ||
| 464 | * The EFD dependency tracking op doesn't do squat. It can't because | 438 | * The EFD dependency tracking op doesn't do squat. It can't because |
| 465 | * it doesn't know where the free extent is coming from. The dependency | 439 | * it doesn't know where the free extent is coming from. The dependency |
| 466 | * tracking has to be handled by the "enclosing" metadata object. For | 440 | * tracking has to be handled by the "enclosing" metadata object. For |
| @@ -482,7 +456,6 @@ static const struct xfs_item_ops xfs_efd_item_ops = { | |||
| 482 | .iop_format = xfs_efd_item_format, | 456 | .iop_format = xfs_efd_item_format, |
| 483 | .iop_pin = xfs_efd_item_pin, | 457 | .iop_pin = xfs_efd_item_pin, |
| 484 | .iop_unpin = xfs_efd_item_unpin, | 458 | .iop_unpin = xfs_efd_item_unpin, |
| 485 | .iop_trylock = xfs_efd_item_trylock, | ||
| 486 | .iop_unlock = xfs_efd_item_unlock, | 459 | .iop_unlock = xfs_efd_item_unlock, |
| 487 | .iop_committed = xfs_efd_item_committed, | 460 | .iop_committed = xfs_efd_item_committed, |
| 488 | .iop_push = xfs_efd_item_push, | 461 | .iop_push = xfs_efd_item_push, |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 54a67dd9ac0a..8d214b87f6bb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
| @@ -17,9 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_bit.h" | ||
| 21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_sb.h" | 21 | #include "xfs_sb.h" |
| 24 | #include "xfs_ag.h" | 22 | #include "xfs_ag.h" |
| 25 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| @@ -396,114 +394,96 @@ xfs_file_splice_write( | |||
| 396 | } | 394 | } |
| 397 | 395 | ||
| 398 | /* | 396 | /* |
| 399 | * This routine is called to handle zeroing any space in the last | 397 | * This routine is called to handle zeroing any space in the last block of the |
| 400 | * block of the file that is beyond the EOF. We do this since the | 398 | * file that is beyond the EOF. We do this since the size is being increased |
| 401 | * size is being increased without writing anything to that block | 399 | * without writing anything to that block and we don't want to read the |
| 402 | * and we don't want anyone to read the garbage on the disk. | 400 | * garbage on the disk. |
| 403 | */ | 401 | */ |
| 404 | STATIC int /* error (positive) */ | 402 | STATIC int /* error (positive) */ |
| 405 | xfs_zero_last_block( | 403 | xfs_zero_last_block( |
| 406 | xfs_inode_t *ip, | 404 | struct xfs_inode *ip, |
| 407 | xfs_fsize_t offset, | 405 | xfs_fsize_t offset, |
| 408 | xfs_fsize_t isize) | 406 | xfs_fsize_t isize) |
| 409 | { | 407 | { |
| 410 | xfs_fileoff_t last_fsb; | 408 | struct xfs_mount *mp = ip->i_mount; |
| 411 | xfs_mount_t *mp = ip->i_mount; | 409 | xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize); |
| 412 | int nimaps; | 410 | int zero_offset = XFS_B_FSB_OFFSET(mp, isize); |
| 413 | int zero_offset; | 411 | int zero_len; |
| 414 | int zero_len; | 412 | int nimaps = 1; |
| 415 | int error = 0; | 413 | int error = 0; |
| 416 | xfs_bmbt_irec_t imap; | 414 | struct xfs_bmbt_irec imap; |
| 417 | |||
| 418 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
| 419 | |||
| 420 | zero_offset = XFS_B_FSB_OFFSET(mp, isize); | ||
| 421 | if (zero_offset == 0) { | ||
| 422 | /* | ||
| 423 | * There are no extra bytes in the last block on disk to | ||
| 424 | * zero, so return. | ||
| 425 | */ | ||
| 426 | return 0; | ||
| 427 | } | ||
| 428 | 415 | ||
| 429 | last_fsb = XFS_B_TO_FSBT(mp, isize); | 416 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
| 430 | nimaps = 1; | ||
| 431 | error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0); | 417 | error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0); |
| 418 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 432 | if (error) | 419 | if (error) |
| 433 | return error; | 420 | return error; |
| 421 | |||
| 434 | ASSERT(nimaps > 0); | 422 | ASSERT(nimaps > 0); |
| 423 | |||
| 435 | /* | 424 | /* |
| 436 | * If the block underlying isize is just a hole, then there | 425 | * If the block underlying isize is just a hole, then there |
| 437 | * is nothing to zero. | 426 | * is nothing to zero. |
| 438 | */ | 427 | */ |
| 439 | if (imap.br_startblock == HOLESTARTBLOCK) { | 428 | if (imap.br_startblock == HOLESTARTBLOCK) |
| 440 | return 0; | 429 | return 0; |
| 441 | } | ||
| 442 | /* | ||
| 443 | * Zero the part of the last block beyond the EOF, and write it | ||
| 444 | * out sync. We need to drop the ilock while we do this so we | ||
| 445 | * don't deadlock when the buffer cache calls back to us. | ||
| 446 | */ | ||
| 447 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 448 | 430 | ||
| 449 | zero_len = mp->m_sb.sb_blocksize - zero_offset; | 431 | zero_len = mp->m_sb.sb_blocksize - zero_offset; |
| 450 | if (isize + zero_len > offset) | 432 | if (isize + zero_len > offset) |
| 451 | zero_len = offset - isize; | 433 | zero_len = offset - isize; |
| 452 | error = xfs_iozero(ip, isize, zero_len); | 434 | return xfs_iozero(ip, isize, zero_len); |
| 453 | |||
| 454 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 455 | ASSERT(error >= 0); | ||
| 456 | return error; | ||
| 457 | } | 435 | } |
| 458 | 436 | ||
| 459 | /* | 437 | /* |
| 460 | * Zero any on disk space between the current EOF and the new, | 438 | * Zero any on disk space between the current EOF and the new, larger EOF. |
| 461 | * larger EOF. This handles the normal case of zeroing the remainder | 439 | * |
| 462 | * of the last block in the file and the unusual case of zeroing blocks | 440 | * This handles the normal case of zeroing the remainder of the last block in |
| 463 | * out beyond the size of the file. This second case only happens | 441 | * the file and the unusual case of zeroing blocks out beyond the size of the |
| 464 | * with fixed size extents and when the system crashes before the inode | 442 | * file. This second case only happens with fixed size extents and when the |
| 465 | * size was updated but after blocks were allocated. If fill is set, | 443 | * system crashes before the inode size was updated but after blocks were |
| 466 | * then any holes in the range are filled and zeroed. If not, the holes | 444 | * allocated. |
| 467 | * are left alone as holes. | 445 | * |
| 446 | * Expects the iolock to be held exclusive, and will take the ilock internally. | ||
| 468 | */ | 447 | */ |
| 469 | |||
| 470 | int /* error (positive) */ | 448 | int /* error (positive) */ |
| 471 | xfs_zero_eof( | 449 | xfs_zero_eof( |
| 472 | xfs_inode_t *ip, | 450 | struct xfs_inode *ip, |
| 473 | xfs_off_t offset, /* starting I/O offset */ | 451 | xfs_off_t offset, /* starting I/O offset */ |
| 474 | xfs_fsize_t isize) /* current inode size */ | 452 | xfs_fsize_t isize) /* current inode size */ |
| 475 | { | 453 | { |
| 476 | xfs_mount_t *mp = ip->i_mount; | 454 | struct xfs_mount *mp = ip->i_mount; |
| 477 | xfs_fileoff_t start_zero_fsb; | 455 | xfs_fileoff_t start_zero_fsb; |
| 478 | xfs_fileoff_t end_zero_fsb; | 456 | xfs_fileoff_t end_zero_fsb; |
| 479 | xfs_fileoff_t zero_count_fsb; | 457 | xfs_fileoff_t zero_count_fsb; |
| 480 | xfs_fileoff_t last_fsb; | 458 | xfs_fileoff_t last_fsb; |
| 481 | xfs_fileoff_t zero_off; | 459 | xfs_fileoff_t zero_off; |
| 482 | xfs_fsize_t zero_len; | 460 | xfs_fsize_t zero_len; |
| 483 | int nimaps; | 461 | int nimaps; |
| 484 | int error = 0; | 462 | int error = 0; |
| 485 | xfs_bmbt_irec_t imap; | 463 | struct xfs_bmbt_irec imap; |
| 486 | 464 | ||
| 487 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 465 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
| 488 | ASSERT(offset > isize); | 466 | ASSERT(offset > isize); |
| 489 | 467 | ||
| 490 | /* | 468 | /* |
| 491 | * First handle zeroing the block on which isize resides. | 469 | * First handle zeroing the block on which isize resides. |
| 470 | * | ||
| 492 | * We only zero a part of that block so it is handled specially. | 471 | * We only zero a part of that block so it is handled specially. |
| 493 | */ | 472 | */ |
| 494 | error = xfs_zero_last_block(ip, offset, isize); | 473 | if (XFS_B_FSB_OFFSET(mp, isize) != 0) { |
| 495 | if (error) { | 474 | error = xfs_zero_last_block(ip, offset, isize); |
| 496 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 475 | if (error) |
| 497 | return error; | 476 | return error; |
| 498 | } | 477 | } |
| 499 | 478 | ||
| 500 | /* | 479 | /* |
| 501 | * Calculate the range between the new size and the old | 480 | * Calculate the range between the new size and the old where blocks |
| 502 | * where blocks needing to be zeroed may exist. To get the | 481 | * needing to be zeroed may exist. |
| 503 | * block where the last byte in the file currently resides, | 482 | * |
| 504 | * we need to subtract one from the size and truncate back | 483 | * To get the block where the last byte in the file currently resides, |
| 505 | * to a block boundary. We subtract 1 in case the size is | 484 | * we need to subtract one from the size and truncate back to a block |
| 506 | * exactly on a block boundary. | 485 | * boundary. We subtract 1 in case the size is exactly on a block |
| 486 | * boundary. | ||
| 507 | */ | 487 | */ |
| 508 | last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; | 488 | last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; |
| 509 | start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); | 489 | start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); |
| @@ -521,23 +501,18 @@ xfs_zero_eof( | |||
| 521 | while (start_zero_fsb <= end_zero_fsb) { | 501 | while (start_zero_fsb <= end_zero_fsb) { |
| 522 | nimaps = 1; | 502 | nimaps = 1; |
| 523 | zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; | 503 | zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; |
| 504 | |||
| 505 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 524 | error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb, | 506 | error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb, |
| 525 | &imap, &nimaps, 0); | 507 | &imap, &nimaps, 0); |
| 526 | if (error) { | 508 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| 527 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 509 | if (error) |
| 528 | return error; | 510 | return error; |
| 529 | } | 511 | |
| 530 | ASSERT(nimaps > 0); | 512 | ASSERT(nimaps > 0); |
| 531 | 513 | ||
| 532 | if (imap.br_state == XFS_EXT_UNWRITTEN || | 514 | if (imap.br_state == XFS_EXT_UNWRITTEN || |
| 533 | imap.br_startblock == HOLESTARTBLOCK) { | 515 | imap.br_startblock == HOLESTARTBLOCK) { |
| 534 | /* | ||
| 535 | * This loop handles initializing pages that were | ||
| 536 | * partially initialized by the code below this | ||
| 537 | * loop. It basically zeroes the part of the page | ||
| 538 | * that sits on a hole and sets the page as P_HOLE | ||
| 539 | * and calls remapf if it is a mapped file. | ||
| 540 | */ | ||
| 541 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; | 516 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; |
| 542 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); | 517 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); |
| 543 | continue; | 518 | continue; |
| @@ -545,11 +520,7 @@ xfs_zero_eof( | |||
| 545 | 520 | ||
| 546 | /* | 521 | /* |
| 547 | * There are blocks we need to zero. | 522 | * There are blocks we need to zero. |
| 548 | * Drop the inode lock while we're doing the I/O. | ||
| 549 | * We'll still have the iolock to protect us. | ||
| 550 | */ | 523 | */ |
| 551 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 552 | |||
| 553 | zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); | 524 | zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); |
| 554 | zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); | 525 | zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); |
| 555 | 526 | ||
| @@ -557,22 +528,14 @@ xfs_zero_eof( | |||
| 557 | zero_len = offset - zero_off; | 528 | zero_len = offset - zero_off; |
| 558 | 529 | ||
| 559 | error = xfs_iozero(ip, zero_off, zero_len); | 530 | error = xfs_iozero(ip, zero_off, zero_len); |
| 560 | if (error) { | 531 | if (error) |
| 561 | goto out_lock; | 532 | return error; |
| 562 | } | ||
| 563 | 533 | ||
| 564 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; | 534 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; |
| 565 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); | 535 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); |
| 566 | |||
| 567 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 568 | } | 536 | } |
| 569 | 537 | ||
| 570 | return 0; | 538 | return 0; |
| 571 | |||
| 572 | out_lock: | ||
| 573 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 574 | ASSERT(error >= 0); | ||
| 575 | return error; | ||
| 576 | } | 539 | } |
| 577 | 540 | ||
| 578 | /* | 541 | /* |
| @@ -593,35 +556,29 @@ xfs_file_aio_write_checks( | |||
| 593 | struct xfs_inode *ip = XFS_I(inode); | 556 | struct xfs_inode *ip = XFS_I(inode); |
| 594 | int error = 0; | 557 | int error = 0; |
| 595 | 558 | ||
| 596 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
| 597 | restart: | 559 | restart: |
| 598 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); | 560 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
| 599 | if (error) { | 561 | if (error) |
| 600 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 601 | return error; | 562 | return error; |
| 602 | } | ||
| 603 | 563 | ||
| 604 | /* | 564 | /* |
| 605 | * If the offset is beyond the size of the file, we need to zero any | 565 | * If the offset is beyond the size of the file, we need to zero any |
| 606 | * blocks that fall between the existing EOF and the start of this | 566 | * blocks that fall between the existing EOF and the start of this |
| 607 | * write. If zeroing is needed and we are currently holding the | 567 | * write. If zeroing is needed and we are currently holding the |
| 608 | * iolock shared, we need to update it to exclusive which involves | 568 | * iolock shared, we need to update it to exclusive which implies |
| 609 | * dropping all locks and relocking to maintain correct locking order. | 569 | * having to redo all checks before. |
| 610 | * If we do this, restart the function to ensure all checks and values | ||
| 611 | * are still valid. | ||
| 612 | */ | 570 | */ |
| 613 | if (*pos > i_size_read(inode)) { | 571 | if (*pos > i_size_read(inode)) { |
| 614 | if (*iolock == XFS_IOLOCK_SHARED) { | 572 | if (*iolock == XFS_IOLOCK_SHARED) { |
| 615 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | 573 | xfs_rw_iunlock(ip, *iolock); |
| 616 | *iolock = XFS_IOLOCK_EXCL; | 574 | *iolock = XFS_IOLOCK_EXCL; |
| 617 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | 575 | xfs_rw_ilock(ip, *iolock); |
| 618 | goto restart; | 576 | goto restart; |
| 619 | } | 577 | } |
| 620 | error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); | 578 | error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); |
| 579 | if (error) | ||
| 580 | return error; | ||
| 621 | } | 581 | } |
| 622 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 623 | if (error) | ||
| 624 | return error; | ||
| 625 | 582 | ||
| 626 | /* | 583 | /* |
| 627 | * Updating the timestamps will grab the ilock again from | 584 | * Updating the timestamps will grab the ilock again from |
| @@ -638,7 +595,6 @@ restart: | |||
| 638 | * people from modifying setuid and setgid binaries. | 595 | * people from modifying setuid and setgid binaries. |
| 639 | */ | 596 | */ |
| 640 | return file_remove_suid(file); | 597 | return file_remove_suid(file); |
| 641 | |||
| 642 | } | 598 | } |
| 643 | 599 | ||
| 644 | /* | 600 | /* |
| @@ -1007,8 +963,149 @@ xfs_vm_page_mkwrite( | |||
| 1007 | return block_page_mkwrite(vma, vmf, xfs_get_blocks); | 963 | return block_page_mkwrite(vma, vmf, xfs_get_blocks); |
| 1008 | } | 964 | } |
| 1009 | 965 | ||
| 966 | STATIC loff_t | ||
| 967 | xfs_seek_data( | ||
| 968 | struct file *file, | ||
| 969 | loff_t start, | ||
| 970 | u32 type) | ||
| 971 | { | ||
| 972 | struct inode *inode = file->f_mapping->host; | ||
| 973 | struct xfs_inode *ip = XFS_I(inode); | ||
| 974 | struct xfs_mount *mp = ip->i_mount; | ||
| 975 | struct xfs_bmbt_irec map[2]; | ||
| 976 | int nmap = 2; | ||
| 977 | loff_t uninitialized_var(offset); | ||
| 978 | xfs_fsize_t isize; | ||
| 979 | xfs_fileoff_t fsbno; | ||
| 980 | xfs_filblks_t end; | ||
| 981 | uint lock; | ||
| 982 | int error; | ||
| 983 | |||
| 984 | lock = xfs_ilock_map_shared(ip); | ||
| 985 | |||
| 986 | isize = i_size_read(inode); | ||
| 987 | if (start >= isize) { | ||
| 988 | error = ENXIO; | ||
| 989 | goto out_unlock; | ||
| 990 | } | ||
| 991 | |||
| 992 | fsbno = XFS_B_TO_FSBT(mp, start); | ||
| 993 | |||
| 994 | /* | ||
| 995 | * Try to read extents from the first block indicated | ||
| 996 | * by fsbno to the end block of the file. | ||
| 997 | */ | ||
| 998 | end = XFS_B_TO_FSB(mp, isize); | ||
| 999 | |||
| 1000 | error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, | ||
| 1001 | XFS_BMAPI_ENTIRE); | ||
| 1002 | if (error) | ||
| 1003 | goto out_unlock; | ||
| 1004 | |||
| 1005 | /* | ||
| 1006 | * Treat unwritten extent as data extent since it might | ||
| 1007 | * contains dirty data in page cache. | ||
| 1008 | */ | ||
| 1009 | if (map[0].br_startblock != HOLESTARTBLOCK) { | ||
| 1010 | offset = max_t(loff_t, start, | ||
| 1011 | XFS_FSB_TO_B(mp, map[0].br_startoff)); | ||
| 1012 | } else { | ||
| 1013 | if (nmap == 1) { | ||
| 1014 | error = ENXIO; | ||
| 1015 | goto out_unlock; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | offset = max_t(loff_t, start, | ||
| 1019 | XFS_FSB_TO_B(mp, map[1].br_startoff)); | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | if (offset != file->f_pos) | ||
| 1023 | file->f_pos = offset; | ||
| 1024 | |||
| 1025 | out_unlock: | ||
| 1026 | xfs_iunlock_map_shared(ip, lock); | ||
| 1027 | |||
| 1028 | if (error) | ||
| 1029 | return -error; | ||
| 1030 | return offset; | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | STATIC loff_t | ||
| 1034 | xfs_seek_hole( | ||
| 1035 | struct file *file, | ||
| 1036 | loff_t start, | ||
| 1037 | u32 type) | ||
| 1038 | { | ||
| 1039 | struct inode *inode = file->f_mapping->host; | ||
| 1040 | struct xfs_inode *ip = XFS_I(inode); | ||
| 1041 | struct xfs_mount *mp = ip->i_mount; | ||
| 1042 | loff_t uninitialized_var(offset); | ||
| 1043 | loff_t holeoff; | ||
| 1044 | xfs_fsize_t isize; | ||
| 1045 | xfs_fileoff_t fsbno; | ||
| 1046 | uint lock; | ||
| 1047 | int error; | ||
| 1048 | |||
| 1049 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
| 1050 | return -XFS_ERROR(EIO); | ||
| 1051 | |||
| 1052 | lock = xfs_ilock_map_shared(ip); | ||
| 1053 | |||
| 1054 | isize = i_size_read(inode); | ||
| 1055 | if (start >= isize) { | ||
| 1056 | error = ENXIO; | ||
| 1057 | goto out_unlock; | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | fsbno = XFS_B_TO_FSBT(mp, start); | ||
| 1061 | error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK); | ||
| 1062 | if (error) | ||
| 1063 | goto out_unlock; | ||
| 1064 | |||
| 1065 | holeoff = XFS_FSB_TO_B(mp, fsbno); | ||
| 1066 | if (holeoff <= start) | ||
| 1067 | offset = start; | ||
| 1068 | else { | ||
| 1069 | /* | ||
| 1070 | * xfs_bmap_first_unused() could return a value bigger than | ||
| 1071 | * isize if there are no more holes past the supplied offset. | ||
| 1072 | */ | ||
| 1073 | offset = min_t(loff_t, holeoff, isize); | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | if (offset != file->f_pos) | ||
| 1077 | file->f_pos = offset; | ||
| 1078 | |||
| 1079 | out_unlock: | ||
| 1080 | xfs_iunlock_map_shared(ip, lock); | ||
| 1081 | |||
| 1082 | if (error) | ||
| 1083 | return -error; | ||
| 1084 | return offset; | ||
| 1085 | } | ||
| 1086 | |||
| 1087 | STATIC loff_t | ||
| 1088 | xfs_file_llseek( | ||
| 1089 | struct file *file, | ||
| 1090 | loff_t offset, | ||
| 1091 | int origin) | ||
| 1092 | { | ||
| 1093 | switch (origin) { | ||
| 1094 | case SEEK_END: | ||
| 1095 | case SEEK_CUR: | ||
| 1096 | case SEEK_SET: | ||
| 1097 | return generic_file_llseek(file, offset, origin); | ||
| 1098 | case SEEK_DATA: | ||
| 1099 | return xfs_seek_data(file, offset, origin); | ||
| 1100 | case SEEK_HOLE: | ||
| 1101 | return xfs_seek_hole(file, offset, origin); | ||
| 1102 | default: | ||
| 1103 | return -EINVAL; | ||
| 1104 | } | ||
| 1105 | } | ||
| 1106 | |||
| 1010 | const struct file_operations xfs_file_operations = { | 1107 | const struct file_operations xfs_file_operations = { |
| 1011 | .llseek = generic_file_llseek, | 1108 | .llseek = xfs_file_llseek, |
| 1012 | .read = do_sync_read, | 1109 | .read = do_sync_read, |
| 1013 | .write = do_sync_write, | 1110 | .write = do_sync_write, |
| 1014 | .aio_read = xfs_file_aio_read, | 1111 | .aio_read = xfs_file_aio_read, |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 1c6fdeb702ff..c25b094efbf7 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
| @@ -18,8 +18,6 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| @@ -39,7 +37,6 @@ | |||
| 39 | #include "xfs_itable.h" | 37 | #include "xfs_itable.h" |
| 40 | #include "xfs_trans_space.h" | 38 | #include "xfs_trans_space.h" |
| 41 | #include "xfs_rtalloc.h" | 39 | #include "xfs_rtalloc.h" |
| 42 | #include "xfs_rw.h" | ||
| 43 | #include "xfs_filestream.h" | 40 | #include "xfs_filestream.h" |
| 44 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
| 45 | 42 | ||
| @@ -147,9 +144,9 @@ xfs_growfs_data_private( | |||
| 147 | if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) | 144 | if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) |
| 148 | return error; | 145 | return error; |
| 149 | dpct = pct - mp->m_sb.sb_imax_pct; | 146 | dpct = pct - mp->m_sb.sb_imax_pct; |
| 150 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, | 147 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, |
| 151 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), | 148 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), |
| 152 | BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); | 149 | XFS_FSS_TO_BB(mp, 1), 0); |
| 153 | if (!bp) | 150 | if (!bp) |
| 154 | return EIO; | 151 | return EIO; |
| 155 | xfs_buf_relse(bp); | 152 | xfs_buf_relse(bp); |
| @@ -193,7 +190,7 @@ xfs_growfs_data_private( | |||
| 193 | */ | 190 | */ |
| 194 | bp = xfs_buf_get(mp->m_ddev_targp, | 191 | bp = xfs_buf_get(mp->m_ddev_targp, |
| 195 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), | 192 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), |
| 196 | XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED); | 193 | XFS_FSS_TO_BB(mp, 1), 0); |
| 197 | if (!bp) { | 194 | if (!bp) { |
| 198 | error = ENOMEM; | 195 | error = ENOMEM; |
| 199 | goto error0; | 196 | goto error0; |
| @@ -230,7 +227,7 @@ xfs_growfs_data_private( | |||
| 230 | */ | 227 | */ |
| 231 | bp = xfs_buf_get(mp->m_ddev_targp, | 228 | bp = xfs_buf_get(mp->m_ddev_targp, |
| 232 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), | 229 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), |
| 233 | XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED); | 230 | XFS_FSS_TO_BB(mp, 1), 0); |
| 234 | if (!bp) { | 231 | if (!bp) { |
| 235 | error = ENOMEM; | 232 | error = ENOMEM; |
| 236 | goto error0; | 233 | goto error0; |
| @@ -259,8 +256,7 @@ xfs_growfs_data_private( | |||
| 259 | */ | 256 | */ |
| 260 | bp = xfs_buf_get(mp->m_ddev_targp, | 257 | bp = xfs_buf_get(mp->m_ddev_targp, |
| 261 | XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), | 258 | XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), |
| 262 | BTOBB(mp->m_sb.sb_blocksize), | 259 | BTOBB(mp->m_sb.sb_blocksize), 0); |
| 263 | XBF_LOCK | XBF_MAPPED); | ||
| 264 | if (!bp) { | 260 | if (!bp) { |
| 265 | error = ENOMEM; | 261 | error = ENOMEM; |
| 266 | goto error0; | 262 | goto error0; |
| @@ -286,8 +282,7 @@ xfs_growfs_data_private( | |||
| 286 | */ | 282 | */ |
| 287 | bp = xfs_buf_get(mp->m_ddev_targp, | 283 | bp = xfs_buf_get(mp->m_ddev_targp, |
| 288 | XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), | 284 | XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), |
| 289 | BTOBB(mp->m_sb.sb_blocksize), | 285 | BTOBB(mp->m_sb.sb_blocksize), 0); |
| 290 | XBF_LOCK | XBF_MAPPED); | ||
| 291 | if (!bp) { | 286 | if (!bp) { |
| 292 | error = ENOMEM; | 287 | error = ENOMEM; |
| 293 | goto error0; | 288 | goto error0; |
| @@ -314,8 +309,7 @@ xfs_growfs_data_private( | |||
| 314 | */ | 309 | */ |
| 315 | bp = xfs_buf_get(mp->m_ddev_targp, | 310 | bp = xfs_buf_get(mp->m_ddev_targp, |
| 316 | XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), | 311 | XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), |
| 317 | BTOBB(mp->m_sb.sb_blocksize), | 312 | BTOBB(mp->m_sb.sb_blocksize), 0); |
| 318 | XBF_LOCK | XBF_MAPPED); | ||
| 319 | if (!bp) { | 313 | if (!bp) { |
| 320 | error = ENOMEM; | 314 | error = ENOMEM; |
| 321 | goto error0; | 315 | goto error0; |
| @@ -405,7 +399,7 @@ xfs_growfs_data_private( | |||
| 405 | 399 | ||
| 406 | /* update secondary superblocks. */ | 400 | /* update secondary superblocks. */ |
| 407 | for (agno = 1; agno < nagcount; agno++) { | 401 | for (agno = 1; agno < nagcount; agno++) { |
| 408 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 402 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, |
| 409 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), | 403 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), |
| 410 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 404 | XFS_FSS_TO_BB(mp, 1), 0, &bp); |
| 411 | if (error) { | 405 | if (error) { |
| @@ -693,3 +687,63 @@ xfs_fs_goingdown( | |||
| 693 | 687 | ||
| 694 | return 0; | 688 | return 0; |
| 695 | } | 689 | } |
| 690 | |||
| 691 | /* | ||
| 692 | * Force a shutdown of the filesystem instantly while keeping the filesystem | ||
| 693 | * consistent. We don't do an unmount here; just shutdown the shop, make sure | ||
| 694 | * that absolutely nothing persistent happens to this filesystem after this | ||
| 695 | * point. | ||
| 696 | */ | ||
| 697 | void | ||
| 698 | xfs_do_force_shutdown( | ||
| 699 | xfs_mount_t *mp, | ||
| 700 | int flags, | ||
| 701 | char *fname, | ||
| 702 | int lnnum) | ||
| 703 | { | ||
| 704 | int logerror; | ||
| 705 | |||
| 706 | logerror = flags & SHUTDOWN_LOG_IO_ERROR; | ||
| 707 | |||
| 708 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
| 709 | xfs_notice(mp, | ||
| 710 | "%s(0x%x) called from line %d of file %s. Return address = 0x%p", | ||
| 711 | __func__, flags, lnnum, fname, __return_address); | ||
| 712 | } | ||
| 713 | /* | ||
| 714 | * No need to duplicate efforts. | ||
| 715 | */ | ||
| 716 | if (XFS_FORCED_SHUTDOWN(mp) && !logerror) | ||
| 717 | return; | ||
| 718 | |||
| 719 | /* | ||
| 720 | * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't | ||
| 721 | * queue up anybody new on the log reservations, and wakes up | ||
| 722 | * everybody who's sleeping on log reservations to tell them | ||
| 723 | * the bad news. | ||
| 724 | */ | ||
| 725 | if (xfs_log_force_umount(mp, logerror)) | ||
| 726 | return; | ||
| 727 | |||
| 728 | if (flags & SHUTDOWN_CORRUPT_INCORE) { | ||
| 729 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, | ||
| 730 | "Corruption of in-memory data detected. Shutting down filesystem"); | ||
| 731 | if (XFS_ERRLEVEL_HIGH <= xfs_error_level) | ||
| 732 | xfs_stack_trace(); | ||
| 733 | } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
| 734 | if (logerror) { | ||
| 735 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, | ||
| 736 | "Log I/O Error Detected. Shutting down filesystem"); | ||
| 737 | } else if (flags & SHUTDOWN_DEVICE_REQ) { | ||
| 738 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, | ||
| 739 | "All device paths lost. Shutting down filesystem"); | ||
| 740 | } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { | ||
| 741 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, | ||
| 742 | "I/O Error Detected. Shutting down filesystem"); | ||
| 743 | } | ||
| 744 | } | ||
| 745 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
| 746 | xfs_alert(mp, | ||
| 747 | "Please umount the filesystem and rectify the problem(s)"); | ||
| 748 | } | ||
| 749 | } | ||
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index dad1a31aa4fc..177a21a7ac49 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
| @@ -200,8 +200,7 @@ xfs_ialloc_inode_init( | |||
| 200 | */ | 200 | */ |
| 201 | d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); | 201 | d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); |
| 202 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, | 202 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, |
| 203 | mp->m_bsize * blks_per_cluster, | 203 | mp->m_bsize * blks_per_cluster, 0); |
| 204 | XBF_LOCK); | ||
| 205 | if (!fbuf) | 204 | if (!fbuf) |
| 206 | return ENOMEM; | 205 | return ENOMEM; |
| 207 | /* | 206 | /* |
| @@ -610,6 +609,13 @@ xfs_ialloc_get_rec( | |||
| 610 | /* | 609 | /* |
| 611 | * Visible inode allocation functions. | 610 | * Visible inode allocation functions. |
| 612 | */ | 611 | */ |
| 612 | /* | ||
| 613 | * Find a free (set) bit in the inode bitmask. | ||
| 614 | */ | ||
| 615 | static inline int xfs_ialloc_find_free(xfs_inofree_t *fp) | ||
| 616 | { | ||
| 617 | return xfs_lowbit64(*fp); | ||
| 618 | } | ||
| 613 | 619 | ||
| 614 | /* | 620 | /* |
| 615 | * Allocate an inode on disk. | 621 | * Allocate an inode on disk. |
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 666a037398d6..65ac57c8063c 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h | |||
| @@ -47,15 +47,6 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) | |||
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | /* | 49 | /* |
| 50 | * Find a free (set) bit in the inode bitmask. | ||
| 51 | */ | ||
| 52 | static inline int xfs_ialloc_find_free(xfs_inofree_t *fp) | ||
| 53 | { | ||
| 54 | return xfs_lowbit64(*fp); | ||
| 55 | } | ||
| 56 | |||
| 57 | |||
| 58 | /* | ||
| 59 | * Allocate an inode on disk. | 50 | * Allocate an inode on disk. |
| 60 | * Mode is used to tell whether the new inode will need space, and whether | 51 | * Mode is used to tell whether the new inode will need space, and whether |
| 61 | * it is a directory. | 52 | * it is a directory. |
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index c6a75815aea0..2b8b7a37aa18 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index bcc6c249b2c7..1bb4365e8c25 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_acl.h" | 21 | #include "xfs_acl.h" |
| 22 | #include "xfs_bit.h" | ||
| 23 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 24 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
| 25 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
| @@ -123,23 +122,7 @@ xfs_inode_free( | |||
| 123 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | 122 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); |
| 124 | 123 | ||
| 125 | if (ip->i_itemp) { | 124 | if (ip->i_itemp) { |
| 126 | /* | 125 | ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL)); |
| 127 | * Only if we are shutting down the fs will we see an | ||
| 128 | * inode still in the AIL. If it is there, we should remove | ||
| 129 | * it to prevent a use-after-free from occurring. | ||
| 130 | */ | ||
| 131 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
| 132 | struct xfs_ail *ailp = lip->li_ailp; | ||
| 133 | |||
| 134 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
| 135 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
| 136 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
| 137 | spin_lock(&ailp->xa_lock); | ||
| 138 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
| 139 | xfs_trans_ail_delete(ailp, lip); | ||
| 140 | else | ||
| 141 | spin_unlock(&ailp->xa_lock); | ||
| 142 | } | ||
| 143 | xfs_inode_item_destroy(ip); | 126 | xfs_inode_item_destroy(ip); |
| 144 | ip->i_itemp = NULL; | 127 | ip->i_itemp = NULL; |
| 145 | } | 128 | } |
| @@ -334,9 +317,10 @@ xfs_iget_cache_miss( | |||
| 334 | /* | 317 | /* |
| 335 | * Preload the radix tree so we can insert safely under the | 318 | * Preload the radix tree so we can insert safely under the |
| 336 | * write spinlock. Note that we cannot sleep inside the preload | 319 | * write spinlock. Note that we cannot sleep inside the preload |
| 337 | * region. | 320 | * region. Since we can be called from transaction context, don't |
| 321 | * recurse into the file system. | ||
| 338 | */ | 322 | */ |
| 339 | if (radix_tree_preload(GFP_KERNEL)) { | 323 | if (radix_tree_preload(GFP_NOFS)) { |
| 340 | error = EAGAIN; | 324 | error = EAGAIN; |
| 341 | goto out_destroy; | 325 | goto out_destroy; |
| 342 | } | 326 | } |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bc46c0a133d3..a59eea09930a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs.h" | 20 | #include "xfs.h" |
| 21 | #include "xfs_fs.h" | 21 | #include "xfs_fs.h" |
| 22 | #include "xfs_types.h" | 22 | #include "xfs_types.h" |
| 23 | #include "xfs_bit.h" | ||
| 24 | #include "xfs_log.h" | 23 | #include "xfs_log.h" |
| 25 | #include "xfs_inum.h" | 24 | #include "xfs_inum.h" |
| 26 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
| @@ -61,6 +60,20 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); | |||
| 61 | STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); | 60 | STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); |
| 62 | STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); | 61 | STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); |
| 63 | 62 | ||
| 63 | /* | ||
| 64 | * helper function to extract extent size hint from inode | ||
| 65 | */ | ||
| 66 | xfs_extlen_t | ||
| 67 | xfs_get_extsz_hint( | ||
| 68 | struct xfs_inode *ip) | ||
| 69 | { | ||
| 70 | if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) | ||
| 71 | return ip->i_d.di_extsize; | ||
| 72 | if (XFS_IS_REALTIME_INODE(ip)) | ||
| 73 | return ip->i_mount->m_sb.sb_rextsize; | ||
| 74 | return 0; | ||
| 75 | } | ||
| 76 | |||
| 64 | #ifdef DEBUG | 77 | #ifdef DEBUG |
| 65 | /* | 78 | /* |
| 66 | * Make sure that the extents in the given memory buffer | 79 | * Make sure that the extents in the given memory buffer |
| @@ -137,6 +150,7 @@ xfs_imap_to_bp( | |||
| 137 | int ni; | 150 | int ni; |
| 138 | xfs_buf_t *bp; | 151 | xfs_buf_t *bp; |
| 139 | 152 | ||
| 153 | buf_flags |= XBF_UNMAPPED; | ||
| 140 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | 154 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, |
| 141 | (int)imap->im_len, buf_flags, &bp); | 155 | (int)imap->im_len, buf_flags, &bp); |
| 142 | if (error) { | 156 | if (error) { |
| @@ -226,7 +240,7 @@ xfs_inotobp( | |||
| 226 | if (error) | 240 | if (error) |
| 227 | return error; | 241 | return error; |
| 228 | 242 | ||
| 229 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags); | 243 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, 0, imap_flags); |
| 230 | if (error) | 244 | if (error) |
| 231 | return error; | 245 | return error; |
| 232 | 246 | ||
| @@ -782,8 +796,7 @@ xfs_iread( | |||
| 782 | /* | 796 | /* |
| 783 | * Get pointers to the on-disk inode and the buffer containing it. | 797 | * Get pointers to the on-disk inode and the buffer containing it. |
| 784 | */ | 798 | */ |
| 785 | error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, | 799 | error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 0, iget_flags); |
| 786 | XBF_LOCK, iget_flags); | ||
| 787 | if (error) | 800 | if (error) |
| 788 | return error; | 801 | return error; |
| 789 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); | 802 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); |
| @@ -1342,7 +1355,7 @@ xfs_iunlink( | |||
| 1342 | * Here we put the head pointer into our next pointer, | 1355 | * Here we put the head pointer into our next pointer, |
| 1343 | * and then we fall through to point the head at us. | 1356 | * and then we fall through to point the head at us. |
| 1344 | */ | 1357 | */ |
| 1345 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); | 1358 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); |
| 1346 | if (error) | 1359 | if (error) |
| 1347 | return error; | 1360 | return error; |
| 1348 | 1361 | ||
| @@ -1423,7 +1436,7 @@ xfs_iunlink_remove( | |||
| 1423 | * of dealing with the buffer when there is no need to | 1436 | * of dealing with the buffer when there is no need to |
| 1424 | * change it. | 1437 | * change it. |
| 1425 | */ | 1438 | */ |
| 1426 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); | 1439 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); |
| 1427 | if (error) { | 1440 | if (error) { |
| 1428 | xfs_warn(mp, "%s: xfs_itobp() returned error %d.", | 1441 | xfs_warn(mp, "%s: xfs_itobp() returned error %d.", |
| 1429 | __func__, error); | 1442 | __func__, error); |
| @@ -1484,7 +1497,7 @@ xfs_iunlink_remove( | |||
| 1484 | * Now last_ibp points to the buffer previous to us on | 1497 | * Now last_ibp points to the buffer previous to us on |
| 1485 | * the unlinked list. Pull us from the list. | 1498 | * the unlinked list. Pull us from the list. |
| 1486 | */ | 1499 | */ |
| 1487 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); | 1500 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); |
| 1488 | if (error) { | 1501 | if (error) { |
| 1489 | xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.", | 1502 | xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.", |
| 1490 | __func__, error); | 1503 | __func__, error); |
| @@ -1566,8 +1579,7 @@ xfs_ifree_cluster( | |||
| 1566 | * to mark all the active inodes on the buffer stale. | 1579 | * to mark all the active inodes on the buffer stale. |
| 1567 | */ | 1580 | */ |
| 1568 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, | 1581 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, |
| 1569 | mp->m_bsize * blks_per_cluster, | 1582 | mp->m_bsize * blks_per_cluster, 0); |
| 1570 | XBF_LOCK); | ||
| 1571 | 1583 | ||
| 1572 | if (!bp) | 1584 | if (!bp) |
| 1573 | return ENOMEM; | 1585 | return ENOMEM; |
| @@ -1737,7 +1749,7 @@ xfs_ifree( | |||
| 1737 | 1749 | ||
| 1738 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 1750 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
| 1739 | 1751 | ||
| 1740 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK); | 1752 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0); |
| 1741 | if (error) | 1753 | if (error) |
| 1742 | return error; | 1754 | return error; |
| 1743 | 1755 | ||
| @@ -2347,11 +2359,11 @@ cluster_corrupt_out: | |||
| 2347 | */ | 2359 | */ |
| 2348 | rcu_read_unlock(); | 2360 | rcu_read_unlock(); |
| 2349 | /* | 2361 | /* |
| 2350 | * Clean up the buffer. If it was B_DELWRI, just release it -- | 2362 | * Clean up the buffer. If it was delwri, just release it -- |
| 2351 | * brelse can handle it with no problems. If not, shut down the | 2363 | * brelse can handle it with no problems. If not, shut down the |
| 2352 | * filesystem before releasing the buffer. | 2364 | * filesystem before releasing the buffer. |
| 2353 | */ | 2365 | */ |
| 2354 | bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); | 2366 | bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q); |
| 2355 | if (bufwasdelwri) | 2367 | if (bufwasdelwri) |
| 2356 | xfs_buf_relse(bp); | 2368 | xfs_buf_relse(bp); |
| 2357 | 2369 | ||
| @@ -2377,30 +2389,29 @@ cluster_corrupt_out: | |||
| 2377 | /* | 2389 | /* |
| 2378 | * Unlocks the flush lock | 2390 | * Unlocks the flush lock |
| 2379 | */ | 2391 | */ |
| 2380 | xfs_iflush_abort(iq); | 2392 | xfs_iflush_abort(iq, false); |
| 2381 | kmem_free(ilist); | 2393 | kmem_free(ilist); |
| 2382 | xfs_perag_put(pag); | 2394 | xfs_perag_put(pag); |
| 2383 | return XFS_ERROR(EFSCORRUPTED); | 2395 | return XFS_ERROR(EFSCORRUPTED); |
| 2384 | } | 2396 | } |
| 2385 | 2397 | ||
| 2386 | /* | 2398 | /* |
| 2387 | * xfs_iflush() will write a modified inode's changes out to the | 2399 | * Flush dirty inode metadata into the backing buffer. |
| 2388 | * inode's on disk home. The caller must have the inode lock held | 2400 | * |
| 2389 | * in at least shared mode and the inode flush completion must be | 2401 | * The caller must have the inode lock and the inode flush lock held. The |
| 2390 | * active as well. The inode lock will still be held upon return from | 2402 | * inode lock will still be held upon return to the caller, and the inode |
| 2391 | * the call and the caller is free to unlock it. | 2403 | * flush lock will be released after the inode has reached the disk. |
| 2392 | * The inode flush will be completed when the inode reaches the disk. | 2404 | * |
| 2393 | * The flags indicate how the inode's buffer should be written out. | 2405 | * The caller must write out the buffer returned in *bpp and release it. |
| 2394 | */ | 2406 | */ |
| 2395 | int | 2407 | int |
| 2396 | xfs_iflush( | 2408 | xfs_iflush( |
| 2397 | xfs_inode_t *ip, | 2409 | struct xfs_inode *ip, |
| 2398 | uint flags) | 2410 | struct xfs_buf **bpp) |
| 2399 | { | 2411 | { |
| 2400 | xfs_inode_log_item_t *iip; | 2412 | struct xfs_mount *mp = ip->i_mount; |
| 2401 | xfs_buf_t *bp; | 2413 | struct xfs_buf *bp; |
| 2402 | xfs_dinode_t *dip; | 2414 | struct xfs_dinode *dip; |
| 2403 | xfs_mount_t *mp; | ||
| 2404 | int error; | 2415 | int error; |
| 2405 | 2416 | ||
| 2406 | XFS_STATS_INC(xs_iflush_count); | 2417 | XFS_STATS_INC(xs_iflush_count); |
| @@ -2410,25 +2421,8 @@ xfs_iflush( | |||
| 2410 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 2421 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
| 2411 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); | 2422 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
| 2412 | 2423 | ||
| 2413 | iip = ip->i_itemp; | 2424 | *bpp = NULL; |
| 2414 | mp = ip->i_mount; | ||
| 2415 | 2425 | ||
| 2416 | /* | ||
| 2417 | * We can't flush the inode until it is unpinned, so wait for it if we | ||
| 2418 | * are allowed to block. We know no one new can pin it, because we are | ||
| 2419 | * holding the inode lock shared and you need to hold it exclusively to | ||
| 2420 | * pin the inode. | ||
| 2421 | * | ||
| 2422 | * If we are not allowed to block, force the log out asynchronously so | ||
| 2423 | * that when we come back the inode will be unpinned. If other inodes | ||
| 2424 | * in the same cluster are dirty, they will probably write the inode | ||
| 2425 | * out for us if they occur after the log force completes. | ||
| 2426 | */ | ||
| 2427 | if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { | ||
| 2428 | xfs_iunpin(ip); | ||
| 2429 | xfs_ifunlock(ip); | ||
| 2430 | return EAGAIN; | ||
| 2431 | } | ||
| 2432 | xfs_iunpin_wait(ip); | 2426 | xfs_iunpin_wait(ip); |
| 2433 | 2427 | ||
| 2434 | /* | 2428 | /* |
| @@ -2447,20 +2441,20 @@ xfs_iflush( | |||
| 2447 | /* | 2441 | /* |
| 2448 | * This may have been unpinned because the filesystem is shutting | 2442 | * This may have been unpinned because the filesystem is shutting |
| 2449 | * down forcibly. If that's the case we must not write this inode | 2443 | * down forcibly. If that's the case we must not write this inode |
| 2450 | * to disk, because the log record didn't make it to disk! | 2444 | * to disk, because the log record didn't make it to disk. |
| 2445 | * | ||
| 2446 | * We also have to remove the log item from the AIL in this case, | ||
| 2447 | * as we wait for an empty AIL as part of the unmount process. | ||
| 2451 | */ | 2448 | */ |
| 2452 | if (XFS_FORCED_SHUTDOWN(mp)) { | 2449 | if (XFS_FORCED_SHUTDOWN(mp)) { |
| 2453 | if (iip) | 2450 | error = XFS_ERROR(EIO); |
| 2454 | iip->ili_fields = 0; | 2451 | goto abort_out; |
| 2455 | xfs_ifunlock(ip); | ||
| 2456 | return XFS_ERROR(EIO); | ||
| 2457 | } | 2452 | } |
| 2458 | 2453 | ||
| 2459 | /* | 2454 | /* |
| 2460 | * Get the buffer containing the on-disk inode. | 2455 | * Get the buffer containing the on-disk inode. |
| 2461 | */ | 2456 | */ |
| 2462 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, | 2457 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK); |
| 2463 | (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK); | ||
| 2464 | if (error || !bp) { | 2458 | if (error || !bp) { |
| 2465 | xfs_ifunlock(ip); | 2459 | xfs_ifunlock(ip); |
| 2466 | return error; | 2460 | return error; |
| @@ -2488,23 +2482,20 @@ xfs_iflush( | |||
| 2488 | if (error) | 2482 | if (error) |
| 2489 | goto cluster_corrupt_out; | 2483 | goto cluster_corrupt_out; |
| 2490 | 2484 | ||
| 2491 | if (flags & SYNC_WAIT) | 2485 | *bpp = bp; |
| 2492 | error = xfs_bwrite(bp); | 2486 | return 0; |
| 2493 | else | ||
| 2494 | xfs_buf_delwri_queue(bp); | ||
| 2495 | |||
| 2496 | xfs_buf_relse(bp); | ||
| 2497 | return error; | ||
| 2498 | 2487 | ||
| 2499 | corrupt_out: | 2488 | corrupt_out: |
| 2500 | xfs_buf_relse(bp); | 2489 | xfs_buf_relse(bp); |
| 2501 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 2490 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
| 2502 | cluster_corrupt_out: | 2491 | cluster_corrupt_out: |
| 2492 | error = XFS_ERROR(EFSCORRUPTED); | ||
| 2493 | abort_out: | ||
| 2503 | /* | 2494 | /* |
| 2504 | * Unlocks the flush lock | 2495 | * Unlocks the flush lock |
| 2505 | */ | 2496 | */ |
| 2506 | xfs_iflush_abort(ip); | 2497 | xfs_iflush_abort(ip, false); |
| 2507 | return XFS_ERROR(EFSCORRUPTED); | 2498 | return error; |
| 2508 | } | 2499 | } |
| 2509 | 2500 | ||
| 2510 | 2501 | ||
| @@ -2706,27 +2697,6 @@ corrupt_out: | |||
| 2706 | return XFS_ERROR(EFSCORRUPTED); | 2697 | return XFS_ERROR(EFSCORRUPTED); |
| 2707 | } | 2698 | } |
| 2708 | 2699 | ||
| 2709 | void | ||
| 2710 | xfs_promote_inode( | ||
| 2711 | struct xfs_inode *ip) | ||
| 2712 | { | ||
| 2713 | struct xfs_buf *bp; | ||
| 2714 | |||
| 2715 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | ||
| 2716 | |||
| 2717 | bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno, | ||
| 2718 | ip->i_imap.im_len, XBF_TRYLOCK); | ||
| 2719 | if (!bp) | ||
| 2720 | return; | ||
| 2721 | |||
| 2722 | if (XFS_BUF_ISDELAYWRITE(bp)) { | ||
| 2723 | xfs_buf_delwri_promote(bp); | ||
| 2724 | wake_up_process(ip->i_mount->m_ddev_targp->bt_task); | ||
| 2725 | } | ||
| 2726 | |||
| 2727 | xfs_buf_relse(bp); | ||
| 2728 | } | ||
| 2729 | |||
| 2730 | /* | 2700 | /* |
| 2731 | * Return a pointer to the extent record at file index idx. | 2701 | * Return a pointer to the extent record at file index idx. |
| 2732 | */ | 2702 | */ |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7fee3387e1c8..1efff36a75b6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
| @@ -529,11 +529,12 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | |||
| 529 | 529 | ||
| 530 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 530 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
| 531 | void xfs_iunpin_wait(xfs_inode_t *); | 531 | void xfs_iunpin_wait(xfs_inode_t *); |
| 532 | int xfs_iflush(xfs_inode_t *, uint); | 532 | int xfs_iflush(struct xfs_inode *, struct xfs_buf **); |
| 533 | void xfs_promote_inode(struct xfs_inode *); | ||
| 534 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 533 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
| 535 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 534 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
| 536 | 535 | ||
| 536 | xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); | ||
| 537 | |||
| 537 | #define IHOLD(ip) \ | 538 | #define IHOLD(ip) \ |
| 538 | do { \ | 539 | do { \ |
| 539 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ | 540 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 05d924efceaf..6cdbf90c6f7b 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
| @@ -18,9 +18,7 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
| @@ -480,25 +478,16 @@ xfs_inode_item_unpin( | |||
| 480 | wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); | 478 | wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); |
| 481 | } | 479 | } |
| 482 | 480 | ||
| 483 | /* | ||
| 484 | * This is called to attempt to lock the inode associated with this | ||
| 485 | * inode log item, in preparation for the push routine which does the actual | ||
| 486 | * iflush. Don't sleep on the inode lock or the flush lock. | ||
| 487 | * | ||
| 488 | * If the flush lock is already held, indicating that the inode has | ||
| 489 | * been or is in the process of being flushed, then (ideally) we'd like to | ||
| 490 | * see if the inode's buffer is still incore, and if so give it a nudge. | ||
| 491 | * We delay doing so until the pushbuf routine, though, to avoid holding | ||
| 492 | * the AIL lock across a call to the blackhole which is the buffer cache. | ||
| 493 | * Also we don't want to sleep in any device strategy routines, which can happen | ||
| 494 | * if we do the subsequent bawrite in here. | ||
| 495 | */ | ||
| 496 | STATIC uint | 481 | STATIC uint |
| 497 | xfs_inode_item_trylock( | 482 | xfs_inode_item_push( |
| 498 | struct xfs_log_item *lip) | 483 | struct xfs_log_item *lip, |
| 484 | struct list_head *buffer_list) | ||
| 499 | { | 485 | { |
| 500 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | 486 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); |
| 501 | struct xfs_inode *ip = iip->ili_inode; | 487 | struct xfs_inode *ip = iip->ili_inode; |
| 488 | struct xfs_buf *bp = NULL; | ||
| 489 | uint rval = XFS_ITEM_SUCCESS; | ||
| 490 | int error; | ||
| 502 | 491 | ||
| 503 | if (xfs_ipincount(ip) > 0) | 492 | if (xfs_ipincount(ip) > 0) |
| 504 | return XFS_ITEM_PINNED; | 493 | return XFS_ITEM_PINNED; |
| @@ -506,30 +495,50 @@ xfs_inode_item_trylock( | |||
| 506 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) | 495 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) |
| 507 | return XFS_ITEM_LOCKED; | 496 | return XFS_ITEM_LOCKED; |
| 508 | 497 | ||
| 498 | /* | ||
| 499 | * Re-check the pincount now that we stabilized the value by | ||
| 500 | * taking the ilock. | ||
| 501 | */ | ||
| 502 | if (xfs_ipincount(ip) > 0) { | ||
| 503 | rval = XFS_ITEM_PINNED; | ||
| 504 | goto out_unlock; | ||
| 505 | } | ||
| 506 | |||
| 507 | /* | ||
| 508 | * Someone else is already flushing the inode. Nothing we can do | ||
| 509 | * here but wait for the flush to finish and remove the item from | ||
| 510 | * the AIL. | ||
| 511 | */ | ||
| 509 | if (!xfs_iflock_nowait(ip)) { | 512 | if (!xfs_iflock_nowait(ip)) { |
| 510 | /* | 513 | rval = XFS_ITEM_FLUSHING; |
| 511 | * inode has already been flushed to the backing buffer, | 514 | goto out_unlock; |
| 512 | * leave it locked in shared mode, pushbuf routine will | ||
| 513 | * unlock it. | ||
| 514 | */ | ||
| 515 | return XFS_ITEM_PUSHBUF; | ||
| 516 | } | 515 | } |
| 517 | 516 | ||
| 518 | /* Stale items should force out the iclog */ | 517 | /* |
| 518 | * Stale inode items should force out the iclog. | ||
| 519 | */ | ||
| 519 | if (ip->i_flags & XFS_ISTALE) { | 520 | if (ip->i_flags & XFS_ISTALE) { |
| 520 | xfs_ifunlock(ip); | 521 | xfs_ifunlock(ip); |
| 521 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 522 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
| 522 | return XFS_ITEM_PINNED; | 523 | return XFS_ITEM_PINNED; |
| 523 | } | 524 | } |
| 524 | 525 | ||
| 525 | #ifdef DEBUG | 526 | ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); |
| 526 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 527 | ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); |
| 527 | ASSERT(iip->ili_fields != 0); | 528 | |
| 528 | ASSERT(iip->ili_logged == 0); | 529 | spin_unlock(&lip->li_ailp->xa_lock); |
| 529 | ASSERT(lip->li_flags & XFS_LI_IN_AIL); | 530 | |
| 531 | error = xfs_iflush(ip, &bp); | ||
| 532 | if (!error) { | ||
| 533 | if (!xfs_buf_delwri_queue(bp, buffer_list)) | ||
| 534 | rval = XFS_ITEM_FLUSHING; | ||
| 535 | xfs_buf_relse(bp); | ||
| 530 | } | 536 | } |
| 531 | #endif | 537 | |
| 532 | return XFS_ITEM_SUCCESS; | 538 | spin_lock(&lip->li_ailp->xa_lock); |
| 539 | out_unlock: | ||
| 540 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
| 541 | return rval; | ||
| 533 | } | 542 | } |
| 534 | 543 | ||
| 535 | /* | 544 | /* |
| @@ -614,86 +623,6 @@ xfs_inode_item_committed( | |||
| 614 | } | 623 | } |
| 615 | 624 | ||
| 616 | /* | 625 | /* |
| 617 | * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK | ||
| 618 | * failed to get the inode flush lock but did get the inode locked SHARED. | ||
| 619 | * Here we're trying to see if the inode buffer is incore, and if so whether it's | ||
| 620 | * marked delayed write. If that's the case, we'll promote it and that will | ||
| 621 | * allow the caller to write the buffer by triggering the xfsbufd to run. | ||
| 622 | */ | ||
| 623 | STATIC bool | ||
| 624 | xfs_inode_item_pushbuf( | ||
| 625 | struct xfs_log_item *lip) | ||
| 626 | { | ||
| 627 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
| 628 | struct xfs_inode *ip = iip->ili_inode; | ||
| 629 | struct xfs_buf *bp; | ||
| 630 | bool ret = true; | ||
| 631 | |||
| 632 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | ||
| 633 | |||
| 634 | /* | ||
| 635 | * If a flush is not in progress anymore, chances are that the | ||
| 636 | * inode was taken off the AIL. So, just get out. | ||
| 637 | */ | ||
| 638 | if (!xfs_isiflocked(ip) || | ||
| 639 | !(lip->li_flags & XFS_LI_IN_AIL)) { | ||
| 640 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
| 641 | return true; | ||
| 642 | } | ||
| 643 | |||
| 644 | bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, | ||
| 645 | iip->ili_format.ilf_len, XBF_TRYLOCK); | ||
| 646 | |||
| 647 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
| 648 | if (!bp) | ||
| 649 | return true; | ||
| 650 | if (XFS_BUF_ISDELAYWRITE(bp)) | ||
| 651 | xfs_buf_delwri_promote(bp); | ||
| 652 | if (xfs_buf_ispinned(bp)) | ||
| 653 | ret = false; | ||
| 654 | xfs_buf_relse(bp); | ||
| 655 | return ret; | ||
| 656 | } | ||
| 657 | |||
| 658 | /* | ||
| 659 | * This is called to asynchronously write the inode associated with this | ||
| 660 | * inode log item out to disk. The inode will already have been locked by | ||
| 661 | * a successful call to xfs_inode_item_trylock(). | ||
| 662 | */ | ||
| 663 | STATIC void | ||
| 664 | xfs_inode_item_push( | ||
| 665 | struct xfs_log_item *lip) | ||
| 666 | { | ||
| 667 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
| 668 | struct xfs_inode *ip = iip->ili_inode; | ||
| 669 | |||
| 670 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | ||
| 671 | ASSERT(xfs_isiflocked(ip)); | ||
| 672 | |||
| 673 | /* | ||
| 674 | * Since we were able to lock the inode's flush lock and | ||
| 675 | * we found it on the AIL, the inode must be dirty. This | ||
| 676 | * is because the inode is removed from the AIL while still | ||
| 677 | * holding the flush lock in xfs_iflush_done(). Thus, if | ||
| 678 | * we found it in the AIL and were able to obtain the flush | ||
| 679 | * lock without sleeping, then there must not have been | ||
| 680 | * anyone in the process of flushing the inode. | ||
| 681 | */ | ||
| 682 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0); | ||
| 683 | |||
| 684 | /* | ||
| 685 | * Push the inode to it's backing buffer. This will not remove the | ||
| 686 | * inode from the AIL - a further push will be required to trigger a | ||
| 687 | * buffer push. However, this allows all the dirty inodes to be pushed | ||
| 688 | * to the buffer before it is pushed to disk. The buffer IO completion | ||
| 689 | * will pull the inode from the AIL, mark it clean and unlock the flush | ||
| 690 | * lock. | ||
| 691 | */ | ||
| 692 | (void) xfs_iflush(ip, SYNC_TRYLOCK); | ||
| 693 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
| 694 | } | ||
| 695 | |||
| 696 | /* | ||
| 697 | * XXX rcc - this one really has to do something. Probably needs | 626 | * XXX rcc - this one really has to do something. Probably needs |
| 698 | * to stamp in a new field in the incore inode. | 627 | * to stamp in a new field in the incore inode. |
| 699 | */ | 628 | */ |
| @@ -713,11 +642,9 @@ static const struct xfs_item_ops xfs_inode_item_ops = { | |||
| 713 | .iop_format = xfs_inode_item_format, | 642 | .iop_format = xfs_inode_item_format, |
| 714 | .iop_pin = xfs_inode_item_pin, | 643 | .iop_pin = xfs_inode_item_pin, |
| 715 | .iop_unpin = xfs_inode_item_unpin, | 644 | .iop_unpin = xfs_inode_item_unpin, |
| 716 | .iop_trylock = xfs_inode_item_trylock, | ||
| 717 | .iop_unlock = xfs_inode_item_unlock, | 645 | .iop_unlock = xfs_inode_item_unlock, |
| 718 | .iop_committed = xfs_inode_item_committed, | 646 | .iop_committed = xfs_inode_item_committed, |
| 719 | .iop_push = xfs_inode_item_push, | 647 | .iop_push = xfs_inode_item_push, |
| 720 | .iop_pushbuf = xfs_inode_item_pushbuf, | ||
| 721 | .iop_committing = xfs_inode_item_committing | 648 | .iop_committing = xfs_inode_item_committing |
| 722 | }; | 649 | }; |
| 723 | 650 | ||
| @@ -848,7 +775,8 @@ xfs_iflush_done( | |||
| 848 | ASSERT(i <= need_ail); | 775 | ASSERT(i <= need_ail); |
| 849 | } | 776 | } |
| 850 | /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ | 777 | /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ |
| 851 | xfs_trans_ail_delete_bulk(ailp, log_items, i); | 778 | xfs_trans_ail_delete_bulk(ailp, log_items, i, |
| 779 | SHUTDOWN_CORRUPT_INCORE); | ||
| 852 | } | 780 | } |
| 853 | 781 | ||
| 854 | 782 | ||
| @@ -869,16 +797,15 @@ xfs_iflush_done( | |||
| 869 | } | 797 | } |
| 870 | 798 | ||
| 871 | /* | 799 | /* |
| 872 | * This is the inode flushing abort routine. It is called | 800 | * This is the inode flushing abort routine. It is called from xfs_iflush when |
| 873 | * from xfs_iflush when the filesystem is shutting down to clean | 801 | * the filesystem is shutting down to clean up the inode state. It is |
| 874 | * up the inode state. | 802 | * responsible for removing the inode item from the AIL if it has not been |
| 875 | * It is responsible for removing the inode item | 803 | * re-logged, and unlocking the inode's flush lock. |
| 876 | * from the AIL if it has not been re-logged, and unlocking the inode's | ||
| 877 | * flush lock. | ||
| 878 | */ | 804 | */ |
| 879 | void | 805 | void |
| 880 | xfs_iflush_abort( | 806 | xfs_iflush_abort( |
| 881 | xfs_inode_t *ip) | 807 | xfs_inode_t *ip, |
| 808 | bool stale) | ||
| 882 | { | 809 | { |
| 883 | xfs_inode_log_item_t *iip = ip->i_itemp; | 810 | xfs_inode_log_item_t *iip = ip->i_itemp; |
| 884 | 811 | ||
| @@ -888,7 +815,10 @@ xfs_iflush_abort( | |||
| 888 | spin_lock(&ailp->xa_lock); | 815 | spin_lock(&ailp->xa_lock); |
| 889 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { | 816 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { |
| 890 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 817 | /* xfs_trans_ail_delete() drops the AIL lock. */ |
| 891 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip); | 818 | xfs_trans_ail_delete(ailp, &iip->ili_item, |
| 819 | stale ? | ||
| 820 | SHUTDOWN_LOG_IO_ERROR : | ||
| 821 | SHUTDOWN_CORRUPT_INCORE); | ||
| 892 | } else | 822 | } else |
| 893 | spin_unlock(&ailp->xa_lock); | 823 | spin_unlock(&ailp->xa_lock); |
| 894 | } | 824 | } |
| @@ -915,7 +845,7 @@ xfs_istale_done( | |||
| 915 | struct xfs_buf *bp, | 845 | struct xfs_buf *bp, |
| 916 | struct xfs_log_item *lip) | 846 | struct xfs_log_item *lip) |
| 917 | { | 847 | { |
| 918 | xfs_iflush_abort(INODE_ITEM(lip)->ili_inode); | 848 | xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true); |
| 919 | } | 849 | } |
| 920 | 850 | ||
| 921 | /* | 851 | /* |
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 41d61c3b7a36..376d4d0b2635 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h | |||
| @@ -165,7 +165,7 @@ extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); | |||
| 165 | extern void xfs_inode_item_destroy(struct xfs_inode *); | 165 | extern void xfs_inode_item_destroy(struct xfs_inode *); |
| 166 | extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *); | 166 | extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *); |
| 167 | extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *); | 167 | extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *); |
| 168 | extern void xfs_iflush_abort(struct xfs_inode *); | 168 | extern void xfs_iflush_abort(struct xfs_inode *, bool); |
| 169 | extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, | 169 | extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, |
| 170 | xfs_inode_log_format_t *); | 170 | xfs_inode_log_format_t *); |
| 171 | 171 | ||
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h index b253c0ea5bec..90efdaf1706f 100644 --- a/fs/xfs/xfs_inum.h +++ b/fs/xfs/xfs_inum.h | |||
| @@ -26,11 +26,6 @@ | |||
| 26 | * high agno_log-agblklog-inopblog bits - 0 | 26 | * high agno_log-agblklog-inopblog bits - 0 |
| 27 | */ | 27 | */ |
| 28 | 28 | ||
| 29 | typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */ | ||
| 30 | |||
| 31 | #define NULLFSINO ((xfs_ino_t)-1) | ||
| 32 | #define NULLAGINO ((xfs_agino_t)-1) | ||
| 33 | |||
| 34 | struct xfs_mount; | 29 | struct xfs_mount; |
| 35 | 30 | ||
| 36 | #define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1) | 31 | #define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1) |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 91f8ff547ab3..3a05a41b5d76 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
| @@ -17,9 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_bit.h" | ||
| 21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index a849a5473aff..c4f2da0d2bf5 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
| @@ -22,9 +22,7 @@ | |||
| 22 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
| 23 | #include "xfs.h" | 23 | #include "xfs.h" |
| 24 | #include "xfs_fs.h" | 24 | #include "xfs_fs.h" |
| 25 | #include "xfs_bit.h" | ||
| 26 | #include "xfs_log.h" | 25 | #include "xfs_log.h" |
| 27 | #include "xfs_inum.h" | ||
| 28 | #include "xfs_trans.h" | 26 | #include "xfs_trans.h" |
| 29 | #include "xfs_sb.h" | 27 | #include "xfs_sb.h" |
| 30 | #include "xfs_ag.h" | 28 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 71a464503c43..aadfce6681ee 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
| @@ -17,9 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_bit.h" | ||
| 21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
| @@ -37,7 +35,6 @@ | |||
| 37 | #include "xfs_rtalloc.h" | 35 | #include "xfs_rtalloc.h" |
| 38 | #include "xfs_error.h" | 36 | #include "xfs_error.h" |
| 39 | #include "xfs_itable.h" | 37 | #include "xfs_itable.h" |
| 40 | #include "xfs_rw.h" | ||
| 41 | #include "xfs_attr.h" | 38 | #include "xfs_attr.h" |
| 42 | #include "xfs_buf_item.h" | 39 | #include "xfs_buf_item.h" |
| 43 | #include "xfs_trans_space.h" | 40 | #include "xfs_trans_space.h" |
| @@ -142,11 +139,7 @@ xfs_iomap_write_direct( | |||
| 142 | int committed; | 139 | int committed; |
| 143 | int error; | 140 | int error; |
| 144 | 141 | ||
| 145 | /* | 142 | error = xfs_qm_dqattach(ip, 0); |
| 146 | * Make sure that the dquots are there. This doesn't hold | ||
| 147 | * the ilock across a disk read. | ||
| 148 | */ | ||
| 149 | error = xfs_qm_dqattach_locked(ip, 0); | ||
| 150 | if (error) | 143 | if (error) |
| 151 | return XFS_ERROR(error); | 144 | return XFS_ERROR(error); |
| 152 | 145 | ||
| @@ -158,7 +151,7 @@ xfs_iomap_write_direct( | |||
| 158 | if ((offset + count) > XFS_ISIZE(ip)) { | 151 | if ((offset + count) > XFS_ISIZE(ip)) { |
| 159 | error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); | 152 | error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); |
| 160 | if (error) | 153 | if (error) |
| 161 | goto error_out; | 154 | return XFS_ERROR(error); |
| 162 | } else { | 155 | } else { |
| 163 | if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) | 156 | if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) |
| 164 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) | 157 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) |
| @@ -190,7 +183,6 @@ xfs_iomap_write_direct( | |||
| 190 | /* | 183 | /* |
| 191 | * Allocate and setup the transaction | 184 | * Allocate and setup the transaction |
| 192 | */ | 185 | */ |
| 193 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 194 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); | 186 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); |
| 195 | error = xfs_trans_reserve(tp, resblks, | 187 | error = xfs_trans_reserve(tp, resblks, |
| 196 | XFS_WRITE_LOG_RES(mp), resrtextents, | 188 | XFS_WRITE_LOG_RES(mp), resrtextents, |
| @@ -199,15 +191,16 @@ xfs_iomap_write_direct( | |||
| 199 | /* | 191 | /* |
| 200 | * Check for running out of space, note: need lock to return | 192 | * Check for running out of space, note: need lock to return |
| 201 | */ | 193 | */ |
| 202 | if (error) | 194 | if (error) { |
| 203 | xfs_trans_cancel(tp, 0); | 195 | xfs_trans_cancel(tp, 0); |
| 196 | return XFS_ERROR(error); | ||
| 197 | } | ||
| 198 | |||
| 204 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 199 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
| 205 | if (error) | ||
| 206 | goto error_out; | ||
| 207 | 200 | ||
| 208 | error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); | 201 | error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); |
| 209 | if (error) | 202 | if (error) |
| 210 | goto error1; | 203 | goto out_trans_cancel; |
| 211 | 204 | ||
| 212 | xfs_trans_ijoin(tp, ip, 0); | 205 | xfs_trans_ijoin(tp, ip, 0); |
| 213 | 206 | ||
| @@ -224,42 +217,39 @@ xfs_iomap_write_direct( | |||
| 224 | error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, | 217 | error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, |
| 225 | &firstfsb, 0, imap, &nimaps, &free_list); | 218 | &firstfsb, 0, imap, &nimaps, &free_list); |
| 226 | if (error) | 219 | if (error) |
| 227 | goto error0; | 220 | goto out_bmap_cancel; |
| 228 | 221 | ||
| 229 | /* | 222 | /* |
| 230 | * Complete the transaction | 223 | * Complete the transaction |
| 231 | */ | 224 | */ |
| 232 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 225 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
| 233 | if (error) | 226 | if (error) |
| 234 | goto error0; | 227 | goto out_bmap_cancel; |
| 235 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 228 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
| 236 | if (error) | 229 | if (error) |
| 237 | goto error_out; | 230 | goto out_unlock; |
| 238 | 231 | ||
| 239 | /* | 232 | /* |
| 240 | * Copy any maps to caller's array and return any error. | 233 | * Copy any maps to caller's array and return any error. |
| 241 | */ | 234 | */ |
| 242 | if (nimaps == 0) { | 235 | if (nimaps == 0) { |
| 243 | error = ENOSPC; | 236 | error = XFS_ERROR(ENOSPC); |
| 244 | goto error_out; | 237 | goto out_unlock; |
| 245 | } | 238 | } |
| 246 | 239 | ||
| 247 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { | 240 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) |
| 248 | error = xfs_alert_fsblock_zero(ip, imap); | 241 | error = xfs_alert_fsblock_zero(ip, imap); |
| 249 | goto error_out; | ||
| 250 | } | ||
| 251 | 242 | ||
| 252 | return 0; | 243 | out_unlock: |
| 244 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 245 | return error; | ||
| 253 | 246 | ||
| 254 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | 247 | out_bmap_cancel: |
| 255 | xfs_bmap_cancel(&free_list); | 248 | xfs_bmap_cancel(&free_list); |
| 256 | xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); | 249 | xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); |
| 257 | 250 | out_trans_cancel: | |
| 258 | error1: /* Just cancel transaction */ | ||
| 259 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 251 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
| 260 | 252 | goto out_unlock; | |
| 261 | error_out: | ||
| 262 | return XFS_ERROR(error); | ||
| 263 | } | 253 | } |
| 264 | 254 | ||
| 265 | /* | 255 | /* |
| @@ -422,6 +412,15 @@ retry: | |||
| 422 | return error; | 412 | return error; |
| 423 | } | 413 | } |
| 424 | 414 | ||
| 415 | /* | ||
| 416 | * Make sure preallocation does not create extents beyond the range we | ||
| 417 | * actually support in this filesystem. | ||
| 418 | */ | ||
| 419 | if (last_fsb > XFS_B_TO_FSB(mp, mp->m_maxioffset)) | ||
| 420 | last_fsb = XFS_B_TO_FSB(mp, mp->m_maxioffset); | ||
| 421 | |||
| 422 | ASSERT(last_fsb > offset_fsb); | ||
| 423 | |||
| 425 | nimaps = XFS_WRITE_IMAPS; | 424 | nimaps = XFS_WRITE_IMAPS; |
| 426 | error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb, | 425 | error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb, |
| 427 | imap, &nimaps, XFS_BMAPI_ENTIRE); | 426 | imap, &nimaps, XFS_BMAPI_ENTIRE); |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 3011b879f850..1a25fd802798 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
| @@ -18,9 +18,7 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_acl.h" | 20 | #include "xfs_acl.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
| @@ -34,7 +32,6 @@ | |||
| 34 | #include "xfs_rtalloc.h" | 32 | #include "xfs_rtalloc.h" |
| 35 | #include "xfs_error.h" | 33 | #include "xfs_error.h" |
| 36 | #include "xfs_itable.h" | 34 | #include "xfs_itable.h" |
| 37 | #include "xfs_rw.h" | ||
| 38 | #include "xfs_attr.h" | 35 | #include "xfs_attr.h" |
| 39 | #include "xfs_buf_item.h" | 36 | #include "xfs_buf_item.h" |
| 40 | #include "xfs_utils.h" | 37 | #include "xfs_utils.h" |
| @@ -700,7 +697,7 @@ xfs_setattr_size( | |||
| 700 | xfs_off_t oldsize, newsize; | 697 | xfs_off_t oldsize, newsize; |
| 701 | struct xfs_trans *tp; | 698 | struct xfs_trans *tp; |
| 702 | int error; | 699 | int error; |
| 703 | uint lock_flags; | 700 | uint lock_flags = 0; |
| 704 | uint commit_flags = 0; | 701 | uint commit_flags = 0; |
| 705 | 702 | ||
| 706 | trace_xfs_setattr(ip); | 703 | trace_xfs_setattr(ip); |
| @@ -720,10 +717,10 @@ xfs_setattr_size( | |||
| 720 | ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| | 717 | ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| |
| 721 | ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); | 718 | ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); |
| 722 | 719 | ||
| 723 | lock_flags = XFS_ILOCK_EXCL; | 720 | if (!(flags & XFS_ATTR_NOLOCK)) { |
| 724 | if (!(flags & XFS_ATTR_NOLOCK)) | ||
| 725 | lock_flags |= XFS_IOLOCK_EXCL; | 721 | lock_flags |= XFS_IOLOCK_EXCL; |
| 726 | xfs_ilock(ip, lock_flags); | 722 | xfs_ilock(ip, lock_flags); |
| 723 | } | ||
| 727 | 724 | ||
| 728 | oldsize = inode->i_size; | 725 | oldsize = inode->i_size; |
| 729 | newsize = iattr->ia_size; | 726 | newsize = iattr->ia_size; |
| @@ -746,7 +743,7 @@ xfs_setattr_size( | |||
| 746 | /* | 743 | /* |
| 747 | * Make sure that the dquots are attached to the inode. | 744 | * Make sure that the dquots are attached to the inode. |
| 748 | */ | 745 | */ |
| 749 | error = xfs_qm_dqattach_locked(ip, 0); | 746 | error = xfs_qm_dqattach(ip, 0); |
| 750 | if (error) | 747 | if (error) |
| 751 | goto out_unlock; | 748 | goto out_unlock; |
| 752 | 749 | ||
| @@ -768,8 +765,6 @@ xfs_setattr_size( | |||
| 768 | if (error) | 765 | if (error) |
| 769 | goto out_unlock; | 766 | goto out_unlock; |
| 770 | } | 767 | } |
| 771 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 772 | lock_flags &= ~XFS_ILOCK_EXCL; | ||
| 773 | 768 | ||
| 774 | /* | 769 | /* |
| 775 | * We are going to log the inode size change in this transaction so | 770 | * We are going to log the inode size change in this transaction so |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index acc2bf264dab..eff577a9b67f 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6db1fef38bff..6b965bf450e4 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
| @@ -18,9 +18,7 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
| @@ -35,7 +33,6 @@ | |||
| 35 | #include "xfs_trans_priv.h" | 33 | #include "xfs_trans_priv.h" |
| 36 | #include "xfs_dinode.h" | 34 | #include "xfs_dinode.h" |
| 37 | #include "xfs_inode.h" | 35 | #include "xfs_inode.h" |
| 38 | #include "xfs_rw.h" | ||
| 39 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
| 40 | 37 | ||
| 41 | kmem_zone_t *xfs_log_ticket_zone; | 38 | kmem_zone_t *xfs_log_ticket_zone; |
| @@ -916,27 +913,42 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
| 916 | * We may be holding the log iclog lock upon entering this routine. | 913 | * We may be holding the log iclog lock upon entering this routine. |
| 917 | */ | 914 | */ |
| 918 | xfs_lsn_t | 915 | xfs_lsn_t |
| 919 | xlog_assign_tail_lsn( | 916 | xlog_assign_tail_lsn_locked( |
| 920 | struct xfs_mount *mp) | 917 | struct xfs_mount *mp) |
| 921 | { | 918 | { |
| 922 | xfs_lsn_t tail_lsn; | ||
| 923 | struct log *log = mp->m_log; | 919 | struct log *log = mp->m_log; |
| 920 | struct xfs_log_item *lip; | ||
| 921 | xfs_lsn_t tail_lsn; | ||
| 922 | |||
| 923 | assert_spin_locked(&mp->m_ail->xa_lock); | ||
| 924 | 924 | ||
| 925 | /* | 925 | /* |
| 926 | * To make sure we always have a valid LSN for the log tail we keep | 926 | * To make sure we always have a valid LSN for the log tail we keep |
| 927 | * track of the last LSN which was committed in log->l_last_sync_lsn, | 927 | * track of the last LSN which was committed in log->l_last_sync_lsn, |
| 928 | * and use that when the AIL was empty and xfs_ail_min_lsn returns 0. | 928 | * and use that when the AIL was empty. |
| 929 | * | ||
| 930 | * If the AIL has been emptied we also need to wake any process | ||
| 931 | * waiting for this condition. | ||
| 932 | */ | 929 | */ |
| 933 | tail_lsn = xfs_ail_min_lsn(mp->m_ail); | 930 | lip = xfs_ail_min(mp->m_ail); |
| 934 | if (!tail_lsn) | 931 | if (lip) |
| 932 | tail_lsn = lip->li_lsn; | ||
| 933 | else | ||
| 935 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); | 934 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
| 936 | atomic64_set(&log->l_tail_lsn, tail_lsn); | 935 | atomic64_set(&log->l_tail_lsn, tail_lsn); |
| 937 | return tail_lsn; | 936 | return tail_lsn; |
| 938 | } | 937 | } |
| 939 | 938 | ||
| 939 | xfs_lsn_t | ||
| 940 | xlog_assign_tail_lsn( | ||
| 941 | struct xfs_mount *mp) | ||
| 942 | { | ||
| 943 | xfs_lsn_t tail_lsn; | ||
| 944 | |||
| 945 | spin_lock(&mp->m_ail->xa_lock); | ||
| 946 | tail_lsn = xlog_assign_tail_lsn_locked(mp); | ||
| 947 | spin_unlock(&mp->m_ail->xa_lock); | ||
| 948 | |||
| 949 | return tail_lsn; | ||
| 950 | } | ||
| 951 | |||
| 940 | /* | 952 | /* |
| 941 | * Return the space in the log between the tail and the head. The head | 953 | * Return the space in the log between the tail and the head. The head |
| 942 | * is passed in the cycle/bytes formal parms. In the special case where | 954 | * is passed in the cycle/bytes formal parms. In the special case where |
| @@ -1172,7 +1184,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1172 | xlog_get_iclog_buffer_size(mp, log); | 1184 | xlog_get_iclog_buffer_size(mp, log); |
| 1173 | 1185 | ||
| 1174 | error = ENOMEM; | 1186 | error = ENOMEM; |
| 1175 | bp = xfs_buf_alloc(mp->m_logdev_targp, 0, log->l_iclog_size, 0); | 1187 | bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); |
| 1176 | if (!bp) | 1188 | if (!bp) |
| 1177 | goto out_free_log; | 1189 | goto out_free_log; |
| 1178 | bp->b_iodone = xlog_iodone; | 1190 | bp->b_iodone = xlog_iodone; |
| @@ -1182,9 +1194,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1182 | spin_lock_init(&log->l_icloglock); | 1194 | spin_lock_init(&log->l_icloglock); |
| 1183 | init_waitqueue_head(&log->l_flush_wait); | 1195 | init_waitqueue_head(&log->l_flush_wait); |
| 1184 | 1196 | ||
| 1185 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | ||
| 1186 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | ||
| 1187 | |||
| 1188 | iclogp = &log->l_iclog; | 1197 | iclogp = &log->l_iclog; |
| 1189 | /* | 1198 | /* |
| 1190 | * The amount of memory to allocate for the iclog structure is | 1199 | * The amount of memory to allocate for the iclog structure is |
| @@ -1204,7 +1213,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1204 | prev_iclog = iclog; | 1213 | prev_iclog = iclog; |
| 1205 | 1214 | ||
| 1206 | bp = xfs_buf_get_uncached(mp->m_logdev_targp, | 1215 | bp = xfs_buf_get_uncached(mp->m_logdev_targp, |
| 1207 | log->l_iclog_size, 0); | 1216 | BTOBB(log->l_iclog_size), 0); |
| 1208 | if (!bp) | 1217 | if (!bp) |
| 1209 | goto out_free_iclog; | 1218 | goto out_free_iclog; |
| 1210 | 1219 | ||
| @@ -1224,7 +1233,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1224 | head->h_fmt = cpu_to_be32(XLOG_FMT); | 1233 | head->h_fmt = cpu_to_be32(XLOG_FMT); |
| 1225 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); | 1234 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); |
| 1226 | 1235 | ||
| 1227 | iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; | 1236 | iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize; |
| 1228 | iclog->ic_state = XLOG_STATE_ACTIVE; | 1237 | iclog->ic_state = XLOG_STATE_ACTIVE; |
| 1229 | iclog->ic_log = log; | 1238 | iclog->ic_log = log; |
| 1230 | atomic_set(&iclog->ic_refcnt, 0); | 1239 | atomic_set(&iclog->ic_refcnt, 0); |
| @@ -1475,7 +1484,7 @@ xlog_sync(xlog_t *log, | |||
| 1475 | } else { | 1484 | } else { |
| 1476 | iclog->ic_bwritecnt = 1; | 1485 | iclog->ic_bwritecnt = 1; |
| 1477 | } | 1486 | } |
| 1478 | XFS_BUF_SET_COUNT(bp, count); | 1487 | bp->b_io_length = BTOBB(count); |
| 1479 | bp->b_fspriv = iclog; | 1488 | bp->b_fspriv = iclog; |
| 1480 | XFS_BUF_ZEROFLAGS(bp); | 1489 | XFS_BUF_ZEROFLAGS(bp); |
| 1481 | XFS_BUF_ASYNC(bp); | 1490 | XFS_BUF_ASYNC(bp); |
| @@ -1573,7 +1582,7 @@ xlog_dealloc_log(xlog_t *log) | |||
| 1573 | * always need to ensure that the extra buffer does not point to memory | 1582 | * always need to ensure that the extra buffer does not point to memory |
| 1574 | * owned by another log buffer before we free it. | 1583 | * owned by another log buffer before we free it. |
| 1575 | */ | 1584 | */ |
| 1576 | xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size); | 1585 | xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); |
| 1577 | xfs_buf_free(log->l_xbuf); | 1586 | xfs_buf_free(log->l_xbuf); |
| 1578 | 1587 | ||
| 1579 | iclog = log->l_iclog; | 1588 | iclog = log->l_iclog; |
| @@ -2932,6 +2941,7 @@ xfs_log_force( | |||
| 2932 | { | 2941 | { |
| 2933 | int error; | 2942 | int error; |
| 2934 | 2943 | ||
| 2944 | trace_xfs_log_force(mp, 0); | ||
| 2935 | error = _xfs_log_force(mp, flags, NULL); | 2945 | error = _xfs_log_force(mp, flags, NULL); |
| 2936 | if (error) | 2946 | if (error) |
| 2937 | xfs_warn(mp, "%s: error %d returned.", __func__, error); | 2947 | xfs_warn(mp, "%s: error %d returned.", __func__, error); |
| @@ -3080,6 +3090,7 @@ xfs_log_force_lsn( | |||
| 3080 | { | 3090 | { |
| 3081 | int error; | 3091 | int error; |
| 3082 | 3092 | ||
| 3093 | trace_xfs_log_force(mp, lsn); | ||
| 3083 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); | 3094 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); |
| 3084 | if (error) | 3095 | if (error) |
| 3085 | xfs_warn(mp, "%s: error %d returned.", __func__, error); | 3096 | xfs_warn(mp, "%s: error %d returned.", __func__, error); |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 2c622bedb302..748d312850e2 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
| @@ -152,6 +152,7 @@ int xfs_log_mount(struct xfs_mount *mp, | |||
| 152 | int num_bblocks); | 152 | int num_bblocks); |
| 153 | int xfs_log_mount_finish(struct xfs_mount *mp); | 153 | int xfs_log_mount_finish(struct xfs_mount *mp); |
| 154 | xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); | 154 | xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); |
| 155 | xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); | ||
| 155 | void xfs_log_space_wake(struct xfs_mount *mp); | 156 | void xfs_log_space_wake(struct xfs_mount *mp); |
| 156 | int xfs_log_notify(struct xfs_mount *mp, | 157 | int xfs_log_notify(struct xfs_mount *mp, |
| 157 | struct xlog_in_core *iclog, | 158 | struct xlog_in_core *iclog, |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index d4fadbe8ac90..7d6197c58493 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
| @@ -18,9 +18,7 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_trans_priv.h" | 23 | #include "xfs_trans_priv.h" |
| 26 | #include "xfs_log_priv.h" | 24 | #include "xfs_log_priv.h" |
| @@ -29,61 +27,10 @@ | |||
| 29 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
| 30 | #include "xfs_error.h" | 28 | #include "xfs_error.h" |
| 31 | #include "xfs_alloc.h" | 29 | #include "xfs_alloc.h" |
| 30 | #include "xfs_extent_busy.h" | ||
| 32 | #include "xfs_discard.h" | 31 | #include "xfs_discard.h" |
| 33 | 32 | ||
| 34 | /* | 33 | /* |
| 35 | * Perform initial CIL structure initialisation. | ||
| 36 | */ | ||
| 37 | int | ||
| 38 | xlog_cil_init( | ||
| 39 | struct log *log) | ||
| 40 | { | ||
| 41 | struct xfs_cil *cil; | ||
| 42 | struct xfs_cil_ctx *ctx; | ||
| 43 | |||
| 44 | cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); | ||
| 45 | if (!cil) | ||
| 46 | return ENOMEM; | ||
| 47 | |||
| 48 | ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); | ||
| 49 | if (!ctx) { | ||
| 50 | kmem_free(cil); | ||
| 51 | return ENOMEM; | ||
| 52 | } | ||
| 53 | |||
| 54 | INIT_LIST_HEAD(&cil->xc_cil); | ||
| 55 | INIT_LIST_HEAD(&cil->xc_committing); | ||
| 56 | spin_lock_init(&cil->xc_cil_lock); | ||
| 57 | init_rwsem(&cil->xc_ctx_lock); | ||
| 58 | init_waitqueue_head(&cil->xc_commit_wait); | ||
| 59 | |||
| 60 | INIT_LIST_HEAD(&ctx->committing); | ||
| 61 | INIT_LIST_HEAD(&ctx->busy_extents); | ||
| 62 | ctx->sequence = 1; | ||
| 63 | ctx->cil = cil; | ||
| 64 | cil->xc_ctx = ctx; | ||
| 65 | cil->xc_current_sequence = ctx->sequence; | ||
| 66 | |||
| 67 | cil->xc_log = log; | ||
| 68 | log->l_cilp = cil; | ||
| 69 | return 0; | ||
| 70 | } | ||
| 71 | |||
| 72 | void | ||
| 73 | xlog_cil_destroy( | ||
| 74 | struct log *log) | ||
| 75 | { | ||
| 76 | if (log->l_cilp->xc_ctx) { | ||
| 77 | if (log->l_cilp->xc_ctx->ticket) | ||
| 78 | xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); | ||
| 79 | kmem_free(log->l_cilp->xc_ctx); | ||
| 80 | } | ||
| 81 | |||
| 82 | ASSERT(list_empty(&log->l_cilp->xc_cil)); | ||
| 83 | kmem_free(log->l_cilp); | ||
| 84 | } | ||
| 85 | |||
| 86 | /* | ||
| 87 | * Allocate a new ticket. Failing to get a new ticket makes it really hard to | 34 | * Allocate a new ticket. Failing to get a new ticket makes it really hard to |
| 88 | * recover, so we don't allow failure here. Also, we allocate in a context that | 35 | * recover, so we don't allow failure here. Also, we allocate in a context that |
| 89 | * we don't want to be issuing transactions from, so we need to tell the | 36 | * we don't want to be issuing transactions from, so we need to tell the |
| @@ -390,8 +337,8 @@ xlog_cil_committed( | |||
| 390 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, | 337 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, |
| 391 | ctx->start_lsn, abort); | 338 | ctx->start_lsn, abort); |
| 392 | 339 | ||
| 393 | xfs_alloc_busy_sort(&ctx->busy_extents); | 340 | xfs_extent_busy_sort(&ctx->busy_extents); |
| 394 | xfs_alloc_busy_clear(mp, &ctx->busy_extents, | 341 | xfs_extent_busy_clear(mp, &ctx->busy_extents, |
| 395 | (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); | 342 | (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); |
| 396 | 343 | ||
| 397 | spin_lock(&ctx->cil->xc_cil_lock); | 344 | spin_lock(&ctx->cil->xc_cil_lock); |
| @@ -404,7 +351,7 @@ xlog_cil_committed( | |||
| 404 | ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); | 351 | ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); |
| 405 | 352 | ||
| 406 | xfs_discard_extents(mp, &ctx->busy_extents); | 353 | xfs_discard_extents(mp, &ctx->busy_extents); |
| 407 | xfs_alloc_busy_clear(mp, &ctx->busy_extents, false); | 354 | xfs_extent_busy_clear(mp, &ctx->busy_extents, false); |
| 408 | } | 355 | } |
| 409 | 356 | ||
| 410 | kmem_free(ctx); | 357 | kmem_free(ctx); |
| @@ -426,8 +373,7 @@ xlog_cil_committed( | |||
| 426 | */ | 373 | */ |
| 427 | STATIC int | 374 | STATIC int |
| 428 | xlog_cil_push( | 375 | xlog_cil_push( |
| 429 | struct log *log, | 376 | struct log *log) |
| 430 | xfs_lsn_t push_seq) | ||
| 431 | { | 377 | { |
| 432 | struct xfs_cil *cil = log->l_cilp; | 378 | struct xfs_cil *cil = log->l_cilp; |
| 433 | struct xfs_log_vec *lv; | 379 | struct xfs_log_vec *lv; |
| @@ -443,39 +389,36 @@ xlog_cil_push( | |||
| 443 | struct xfs_log_iovec lhdr; | 389 | struct xfs_log_iovec lhdr; |
| 444 | struct xfs_log_vec lvhdr = { NULL }; | 390 | struct xfs_log_vec lvhdr = { NULL }; |
| 445 | xfs_lsn_t commit_lsn; | 391 | xfs_lsn_t commit_lsn; |
| 392 | xfs_lsn_t push_seq; | ||
| 446 | 393 | ||
| 447 | if (!cil) | 394 | if (!cil) |
| 448 | return 0; | 395 | return 0; |
| 449 | 396 | ||
| 450 | ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); | ||
| 451 | |||
| 452 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 397 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
| 453 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 398 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
| 454 | 399 | ||
| 455 | /* | 400 | down_write(&cil->xc_ctx_lock); |
| 456 | * Lock out transaction commit, but don't block for background pushes | ||
| 457 | * unless we are well over the CIL space limit. See the definition of | ||
| 458 | * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic | ||
| 459 | * used here. | ||
| 460 | */ | ||
| 461 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | ||
| 462 | if (!push_seq && | ||
| 463 | cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) | ||
| 464 | goto out_free_ticket; | ||
| 465 | down_write(&cil->xc_ctx_lock); | ||
| 466 | } | ||
| 467 | ctx = cil->xc_ctx; | 401 | ctx = cil->xc_ctx; |
| 468 | 402 | ||
| 469 | /* check if we've anything to push */ | 403 | spin_lock(&cil->xc_cil_lock); |
| 470 | if (list_empty(&cil->xc_cil)) | 404 | push_seq = cil->xc_push_seq; |
| 471 | goto out_skip; | 405 | ASSERT(push_seq <= ctx->sequence); |
| 472 | 406 | ||
| 473 | /* check for spurious background flush */ | 407 | /* |
| 474 | if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | 408 | * Check if we've anything to push. If there is nothing, then we don't |
| 409 | * move on to a new sequence number and so we have to be able to push | ||
| 410 | * this sequence again later. | ||
| 411 | */ | ||
| 412 | if (list_empty(&cil->xc_cil)) { | ||
| 413 | cil->xc_push_seq = 0; | ||
| 414 | spin_unlock(&cil->xc_cil_lock); | ||
| 475 | goto out_skip; | 415 | goto out_skip; |
| 416 | } | ||
| 417 | spin_unlock(&cil->xc_cil_lock); | ||
| 418 | |||
| 476 | 419 | ||
| 477 | /* check for a previously pushed seqeunce */ | 420 | /* check for a previously pushed seqeunce */ |
| 478 | if (push_seq && push_seq < cil->xc_ctx->sequence) | 421 | if (push_seq < cil->xc_ctx->sequence) |
| 479 | goto out_skip; | 422 | goto out_skip; |
| 480 | 423 | ||
| 481 | /* | 424 | /* |
| @@ -629,7 +572,6 @@ restart: | |||
| 629 | 572 | ||
| 630 | out_skip: | 573 | out_skip: |
| 631 | up_write(&cil->xc_ctx_lock); | 574 | up_write(&cil->xc_ctx_lock); |
| 632 | out_free_ticket: | ||
| 633 | xfs_log_ticket_put(new_ctx->ticket); | 575 | xfs_log_ticket_put(new_ctx->ticket); |
| 634 | kmem_free(new_ctx); | 576 | kmem_free(new_ctx); |
| 635 | return 0; | 577 | return 0; |
| @@ -641,6 +583,82 @@ out_abort: | |||
| 641 | return XFS_ERROR(EIO); | 583 | return XFS_ERROR(EIO); |
| 642 | } | 584 | } |
| 643 | 585 | ||
| 586 | static void | ||
| 587 | xlog_cil_push_work( | ||
| 588 | struct work_struct *work) | ||
| 589 | { | ||
| 590 | struct xfs_cil *cil = container_of(work, struct xfs_cil, | ||
| 591 | xc_push_work); | ||
| 592 | xlog_cil_push(cil->xc_log); | ||
| 593 | } | ||
| 594 | |||
| 595 | /* | ||
| 596 | * We need to push CIL every so often so we don't cache more than we can fit in | ||
| 597 | * the log. The limit really is that a checkpoint can't be more than half the | ||
| 598 | * log (the current checkpoint is not allowed to overwrite the previous | ||
| 599 | * checkpoint), but commit latency and memory usage limit this to a smaller | ||
| 600 | * size. | ||
| 601 | */ | ||
| 602 | static void | ||
| 603 | xlog_cil_push_background( | ||
| 604 | struct log *log) | ||
| 605 | { | ||
| 606 | struct xfs_cil *cil = log->l_cilp; | ||
| 607 | |||
| 608 | /* | ||
| 609 | * The cil won't be empty because we are called while holding the | ||
| 610 | * context lock so whatever we added to the CIL will still be there | ||
| 611 | */ | ||
| 612 | ASSERT(!list_empty(&cil->xc_cil)); | ||
| 613 | |||
| 614 | /* | ||
| 615 | * don't do a background push if we haven't used up all the | ||
| 616 | * space available yet. | ||
| 617 | */ | ||
| 618 | if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | ||
| 619 | return; | ||
| 620 | |||
| 621 | spin_lock(&cil->xc_cil_lock); | ||
| 622 | if (cil->xc_push_seq < cil->xc_current_sequence) { | ||
| 623 | cil->xc_push_seq = cil->xc_current_sequence; | ||
| 624 | queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); | ||
| 625 | } | ||
| 626 | spin_unlock(&cil->xc_cil_lock); | ||
| 627 | |||
| 628 | } | ||
| 629 | |||
| 630 | static void | ||
| 631 | xlog_cil_push_foreground( | ||
| 632 | struct log *log, | ||
| 633 | xfs_lsn_t push_seq) | ||
| 634 | { | ||
| 635 | struct xfs_cil *cil = log->l_cilp; | ||
| 636 | |||
| 637 | if (!cil) | ||
| 638 | return; | ||
| 639 | |||
| 640 | ASSERT(push_seq && push_seq <= cil->xc_current_sequence); | ||
| 641 | |||
| 642 | /* start on any pending background push to minimise wait time on it */ | ||
| 643 | flush_work(&cil->xc_push_work); | ||
| 644 | |||
| 645 | /* | ||
| 646 | * If the CIL is empty or we've already pushed the sequence then | ||
| 647 | * there's no work we need to do. | ||
| 648 | */ | ||
| 649 | spin_lock(&cil->xc_cil_lock); | ||
| 650 | if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) { | ||
| 651 | spin_unlock(&cil->xc_cil_lock); | ||
| 652 | return; | ||
| 653 | } | ||
| 654 | |||
| 655 | cil->xc_push_seq = push_seq; | ||
| 656 | spin_unlock(&cil->xc_cil_lock); | ||
| 657 | |||
| 658 | /* do the push now */ | ||
| 659 | xlog_cil_push(log); | ||
| 660 | } | ||
| 661 | |||
| 644 | /* | 662 | /* |
| 645 | * Commit a transaction with the given vector to the Committed Item List. | 663 | * Commit a transaction with the given vector to the Committed Item List. |
| 646 | * | 664 | * |
| @@ -667,7 +685,6 @@ xfs_log_commit_cil( | |||
| 667 | { | 685 | { |
| 668 | struct log *log = mp->m_log; | 686 | struct log *log = mp->m_log; |
| 669 | int log_flags = 0; | 687 | int log_flags = 0; |
| 670 | int push = 0; | ||
| 671 | struct xfs_log_vec *log_vector; | 688 | struct xfs_log_vec *log_vector; |
| 672 | 689 | ||
| 673 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | 690 | if (flags & XFS_TRANS_RELEASE_LOG_RES) |
| @@ -719,21 +736,9 @@ xfs_log_commit_cil( | |||
| 719 | */ | 736 | */ |
| 720 | xfs_trans_free_items(tp, *commit_lsn, 0); | 737 | xfs_trans_free_items(tp, *commit_lsn, 0); |
| 721 | 738 | ||
| 722 | /* check for background commit before unlock */ | 739 | xlog_cil_push_background(log); |
| 723 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
| 724 | push = 1; | ||
| 725 | 740 | ||
| 726 | up_read(&log->l_cilp->xc_ctx_lock); | 741 | up_read(&log->l_cilp->xc_ctx_lock); |
| 727 | |||
| 728 | /* | ||
| 729 | * We need to push CIL every so often so we don't cache more than we | ||
| 730 | * can fit in the log. The limit really is that a checkpoint can't be | ||
| 731 | * more than half the log (the current checkpoint is not allowed to | ||
| 732 | * overwrite the previous checkpoint), but commit latency and memory | ||
| 733 | * usage limit this to a smaller size in most cases. | ||
| 734 | */ | ||
| 735 | if (push) | ||
| 736 | xlog_cil_push(log, 0); | ||
| 737 | return 0; | 742 | return 0; |
| 738 | } | 743 | } |
| 739 | 744 | ||
| @@ -746,9 +751,6 @@ xfs_log_commit_cil( | |||
| 746 | * | 751 | * |
| 747 | * We return the current commit lsn to allow the callers to determine if a | 752 | * We return the current commit lsn to allow the callers to determine if a |
| 748 | * iclog flush is necessary following this call. | 753 | * iclog flush is necessary following this call. |
| 749 | * | ||
| 750 | * XXX: Initially, just push the CIL unconditionally and return whatever | ||
| 751 | * commit lsn is there. It'll be empty, so this is broken for now. | ||
| 752 | */ | 754 | */ |
| 753 | xfs_lsn_t | 755 | xfs_lsn_t |
| 754 | xlog_cil_force_lsn( | 756 | xlog_cil_force_lsn( |
| @@ -766,8 +768,7 @@ xlog_cil_force_lsn( | |||
| 766 | * xlog_cil_push() handles racing pushes for the same sequence, | 768 | * xlog_cil_push() handles racing pushes for the same sequence, |
| 767 | * so no need to deal with it here. | 769 | * so no need to deal with it here. |
| 768 | */ | 770 | */ |
| 769 | if (sequence == cil->xc_current_sequence) | 771 | xlog_cil_push_foreground(log, sequence); |
| 770 | xlog_cil_push(log, sequence); | ||
| 771 | 772 | ||
| 772 | /* | 773 | /* |
| 773 | * See if we can find a previous sequence still committing. | 774 | * See if we can find a previous sequence still committing. |
| @@ -826,3 +827,57 @@ xfs_log_item_in_current_chkpt( | |||
| 826 | return false; | 827 | return false; |
| 827 | return true; | 828 | return true; |
| 828 | } | 829 | } |
| 830 | |||
| 831 | /* | ||
| 832 | * Perform initial CIL structure initialisation. | ||
| 833 | */ | ||
| 834 | int | ||
| 835 | xlog_cil_init( | ||
| 836 | struct log *log) | ||
| 837 | { | ||
| 838 | struct xfs_cil *cil; | ||
| 839 | struct xfs_cil_ctx *ctx; | ||
| 840 | |||
| 841 | cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); | ||
| 842 | if (!cil) | ||
| 843 | return ENOMEM; | ||
| 844 | |||
| 845 | ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); | ||
| 846 | if (!ctx) { | ||
| 847 | kmem_free(cil); | ||
| 848 | return ENOMEM; | ||
| 849 | } | ||
| 850 | |||
| 851 | INIT_WORK(&cil->xc_push_work, xlog_cil_push_work); | ||
| 852 | INIT_LIST_HEAD(&cil->xc_cil); | ||
| 853 | INIT_LIST_HEAD(&cil->xc_committing); | ||
| 854 | spin_lock_init(&cil->xc_cil_lock); | ||
| 855 | init_rwsem(&cil->xc_ctx_lock); | ||
| 856 | init_waitqueue_head(&cil->xc_commit_wait); | ||
| 857 | |||
| 858 | INIT_LIST_HEAD(&ctx->committing); | ||
| 859 | INIT_LIST_HEAD(&ctx->busy_extents); | ||
| 860 | ctx->sequence = 1; | ||
| 861 | ctx->cil = cil; | ||
| 862 | cil->xc_ctx = ctx; | ||
| 863 | cil->xc_current_sequence = ctx->sequence; | ||
| 864 | |||
| 865 | cil->xc_log = log; | ||
| 866 | log->l_cilp = cil; | ||
| 867 | return 0; | ||
| 868 | } | ||
| 869 | |||
| 870 | void | ||
| 871 | xlog_cil_destroy( | ||
| 872 | struct log *log) | ||
| 873 | { | ||
| 874 | if (log->l_cilp->xc_ctx) { | ||
| 875 | if (log->l_cilp->xc_ctx->ticket) | ||
| 876 | xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); | ||
| 877 | kmem_free(log->l_cilp->xc_ctx); | ||
| 878 | } | ||
| 879 | |||
| 880 | ASSERT(list_empty(&log->l_cilp->xc_cil)); | ||
| 881 | kmem_free(log->l_cilp); | ||
| 882 | } | ||
| 883 | |||
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 2152900b79d4..735ff1ee53da 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
| @@ -417,6 +417,8 @@ struct xfs_cil { | |||
| 417 | struct list_head xc_committing; | 417 | struct list_head xc_committing; |
| 418 | wait_queue_head_t xc_commit_wait; | 418 | wait_queue_head_t xc_commit_wait; |
| 419 | xfs_lsn_t xc_current_sequence; | 419 | xfs_lsn_t xc_current_sequence; |
| 420 | struct work_struct xc_push_work; | ||
| 421 | xfs_lsn_t xc_push_seq; | ||
| 420 | }; | 422 | }; |
| 421 | 423 | ||
| 422 | /* | 424 | /* |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 8ecad5bad66c..ca386909131a 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
| @@ -40,7 +40,6 @@ | |||
| 40 | #include "xfs_extfree_item.h" | 40 | #include "xfs_extfree_item.h" |
| 41 | #include "xfs_trans_priv.h" | 41 | #include "xfs_trans_priv.h" |
| 42 | #include "xfs_quota.h" | 42 | #include "xfs_quota.h" |
| 43 | #include "xfs_rw.h" | ||
| 44 | #include "xfs_utils.h" | 43 | #include "xfs_utils.h" |
| 45 | #include "xfs_trace.h" | 44 | #include "xfs_trace.h" |
| 46 | 45 | ||
| @@ -120,7 +119,7 @@ xlog_get_bp( | |||
| 120 | nbblks += log->l_sectBBsize; | 119 | nbblks += log->l_sectBBsize; |
| 121 | nbblks = round_up(nbblks, log->l_sectBBsize); | 120 | nbblks = round_up(nbblks, log->l_sectBBsize); |
| 122 | 121 | ||
| 123 | bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0); | 122 | bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); |
| 124 | if (bp) | 123 | if (bp) |
| 125 | xfs_buf_unlock(bp); | 124 | xfs_buf_unlock(bp); |
| 126 | return bp; | 125 | return bp; |
| @@ -146,7 +145,7 @@ xlog_align( | |||
| 146 | { | 145 | { |
| 147 | xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); | 146 | xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); |
| 148 | 147 | ||
| 149 | ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); | 148 | ASSERT(offset + nbblks <= bp->b_length); |
| 150 | return bp->b_addr + BBTOB(offset); | 149 | return bp->b_addr + BBTOB(offset); |
| 151 | } | 150 | } |
| 152 | 151 | ||
| @@ -174,11 +173,12 @@ xlog_bread_noalign( | |||
| 174 | nbblks = round_up(nbblks, log->l_sectBBsize); | 173 | nbblks = round_up(nbblks, log->l_sectBBsize); |
| 175 | 174 | ||
| 176 | ASSERT(nbblks > 0); | 175 | ASSERT(nbblks > 0); |
| 177 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); | 176 | ASSERT(nbblks <= bp->b_length); |
| 178 | 177 | ||
| 179 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 178 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
| 180 | XFS_BUF_READ(bp); | 179 | XFS_BUF_READ(bp); |
| 181 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 180 | bp->b_io_length = nbblks; |
| 181 | bp->b_error = 0; | ||
| 182 | 182 | ||
| 183 | xfsbdstrat(log->l_mp, bp); | 183 | xfsbdstrat(log->l_mp, bp); |
| 184 | error = xfs_buf_iowait(bp); | 184 | error = xfs_buf_iowait(bp); |
| @@ -218,7 +218,7 @@ xlog_bread_offset( | |||
| 218 | xfs_caddr_t offset) | 218 | xfs_caddr_t offset) |
| 219 | { | 219 | { |
| 220 | xfs_caddr_t orig_offset = bp->b_addr; | 220 | xfs_caddr_t orig_offset = bp->b_addr; |
| 221 | int orig_len = bp->b_buffer_length; | 221 | int orig_len = BBTOB(bp->b_length); |
| 222 | int error, error2; | 222 | int error, error2; |
| 223 | 223 | ||
| 224 | error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); | 224 | error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); |
| @@ -259,13 +259,14 @@ xlog_bwrite( | |||
| 259 | nbblks = round_up(nbblks, log->l_sectBBsize); | 259 | nbblks = round_up(nbblks, log->l_sectBBsize); |
| 260 | 260 | ||
| 261 | ASSERT(nbblks > 0); | 261 | ASSERT(nbblks > 0); |
| 262 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); | 262 | ASSERT(nbblks <= bp->b_length); |
| 263 | 263 | ||
| 264 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 264 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
| 265 | XFS_BUF_ZEROFLAGS(bp); | 265 | XFS_BUF_ZEROFLAGS(bp); |
| 266 | xfs_buf_hold(bp); | 266 | xfs_buf_hold(bp); |
| 267 | xfs_buf_lock(bp); | 267 | xfs_buf_lock(bp); |
| 268 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 268 | bp->b_io_length = nbblks; |
| 269 | bp->b_error = 0; | ||
| 269 | 270 | ||
| 270 | error = xfs_bwrite(bp); | 271 | error = xfs_bwrite(bp); |
| 271 | if (error) | 272 | if (error) |
| @@ -440,6 +441,8 @@ xlog_find_verify_cycle( | |||
| 440 | * a log sector, or we're out of luck. | 441 | * a log sector, or we're out of luck. |
| 441 | */ | 442 | */ |
| 442 | bufblks = 1 << ffs(nbblks); | 443 | bufblks = 1 << ffs(nbblks); |
| 444 | while (bufblks > log->l_logBBsize) | ||
| 445 | bufblks >>= 1; | ||
| 443 | while (!(bp = xlog_get_bp(log, bufblks))) { | 446 | while (!(bp = xlog_get_bp(log, bufblks))) { |
| 444 | bufblks >>= 1; | 447 | bufblks >>= 1; |
| 445 | if (bufblks < log->l_sectBBsize) | 448 | if (bufblks < log->l_sectBBsize) |
| @@ -1225,6 +1228,8 @@ xlog_write_log_records( | |||
| 1225 | * log sector, or we're out of luck. | 1228 | * log sector, or we're out of luck. |
| 1226 | */ | 1229 | */ |
| 1227 | bufblks = 1 << ffs(blocks); | 1230 | bufblks = 1 << ffs(blocks); |
| 1231 | while (bufblks > log->l_logBBsize) | ||
| 1232 | bufblks >>= 1; | ||
| 1228 | while (!(bp = xlog_get_bp(log, bufblks))) { | 1233 | while (!(bp = xlog_get_bp(log, bufblks))) { |
| 1229 | bufblks >>= 1; | 1234 | bufblks >>= 1; |
| 1230 | if (bufblks < sectbb) | 1235 | if (bufblks < sectbb) |
| @@ -1772,7 +1777,7 @@ xlog_recover_do_inode_buffer( | |||
| 1772 | 1777 | ||
| 1773 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); | 1778 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
| 1774 | 1779 | ||
| 1775 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; | 1780 | inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; |
| 1776 | for (i = 0; i < inodes_per_buf; i++) { | 1781 | for (i = 0; i < inodes_per_buf; i++) { |
| 1777 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + | 1782 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
| 1778 | offsetof(xfs_dinode_t, di_next_unlinked); | 1783 | offsetof(xfs_dinode_t, di_next_unlinked); |
| @@ -1814,7 +1819,8 @@ xlog_recover_do_inode_buffer( | |||
| 1814 | 1819 | ||
| 1815 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1820 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
| 1816 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); | 1821 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
| 1817 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); | 1822 | ASSERT((reg_buf_offset + reg_buf_bytes) <= |
| 1823 | BBTOB(bp->b_io_length)); | ||
| 1818 | 1824 | ||
| 1819 | /* | 1825 | /* |
| 1820 | * The current logged region contains a copy of the | 1826 | * The current logged region contains a copy of the |
| @@ -1873,8 +1879,8 @@ xlog_recover_do_reg_buffer( | |||
| 1873 | ASSERT(nbits > 0); | 1879 | ASSERT(nbits > 0); |
| 1874 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1880 | ASSERT(item->ri_buf[i].i_addr != NULL); |
| 1875 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); | 1881 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
| 1876 | ASSERT(XFS_BUF_COUNT(bp) >= | 1882 | ASSERT(BBTOB(bp->b_io_length) >= |
| 1877 | ((uint)bit << XFS_BLF_SHIFT)+(nbits<<XFS_BLF_SHIFT)); | 1883 | ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); |
| 1878 | 1884 | ||
| 1879 | /* | 1885 | /* |
| 1880 | * Do a sanity check if this is a dquot buffer. Just checking | 1886 | * Do a sanity check if this is a dquot buffer. Just checking |
| @@ -2103,6 +2109,7 @@ xlog_recover_do_dquot_buffer( | |||
| 2103 | STATIC int | 2109 | STATIC int |
| 2104 | xlog_recover_buffer_pass2( | 2110 | xlog_recover_buffer_pass2( |
| 2105 | xlog_t *log, | 2111 | xlog_t *log, |
| 2112 | struct list_head *buffer_list, | ||
| 2106 | xlog_recover_item_t *item) | 2113 | xlog_recover_item_t *item) |
| 2107 | { | 2114 | { |
| 2108 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 2115 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
| @@ -2123,9 +2130,9 @@ xlog_recover_buffer_pass2( | |||
| 2123 | 2130 | ||
| 2124 | trace_xfs_log_recover_buf_recover(log, buf_f); | 2131 | trace_xfs_log_recover_buf_recover(log, buf_f); |
| 2125 | 2132 | ||
| 2126 | buf_flags = XBF_LOCK; | 2133 | buf_flags = 0; |
| 2127 | if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF)) | 2134 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) |
| 2128 | buf_flags |= XBF_MAPPED; | 2135 | buf_flags |= XBF_UNMAPPED; |
| 2129 | 2136 | ||
| 2130 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, | 2137 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
| 2131 | buf_flags); | 2138 | buf_flags); |
| @@ -2166,14 +2173,14 @@ xlog_recover_buffer_pass2( | |||
| 2166 | */ | 2173 | */ |
| 2167 | if (XFS_DINODE_MAGIC == | 2174 | if (XFS_DINODE_MAGIC == |
| 2168 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && | 2175 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && |
| 2169 | (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, | 2176 | (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, |
| 2170 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { | 2177 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { |
| 2171 | xfs_buf_stale(bp); | 2178 | xfs_buf_stale(bp); |
| 2172 | error = xfs_bwrite(bp); | 2179 | error = xfs_bwrite(bp); |
| 2173 | } else { | 2180 | } else { |
| 2174 | ASSERT(bp->b_target->bt_mount == mp); | 2181 | ASSERT(bp->b_target->bt_mount == mp); |
| 2175 | bp->b_iodone = xlog_recover_iodone; | 2182 | bp->b_iodone = xlog_recover_iodone; |
| 2176 | xfs_buf_delwri_queue(bp); | 2183 | xfs_buf_delwri_queue(bp, buffer_list); |
| 2177 | } | 2184 | } |
| 2178 | 2185 | ||
| 2179 | xfs_buf_relse(bp); | 2186 | xfs_buf_relse(bp); |
| @@ -2183,6 +2190,7 @@ xlog_recover_buffer_pass2( | |||
| 2183 | STATIC int | 2190 | STATIC int |
| 2184 | xlog_recover_inode_pass2( | 2191 | xlog_recover_inode_pass2( |
| 2185 | xlog_t *log, | 2192 | xlog_t *log, |
| 2193 | struct list_head *buffer_list, | ||
| 2186 | xlog_recover_item_t *item) | 2194 | xlog_recover_item_t *item) |
| 2187 | { | 2195 | { |
| 2188 | xfs_inode_log_format_t *in_f; | 2196 | xfs_inode_log_format_t *in_f; |
| @@ -2220,8 +2228,7 @@ xlog_recover_inode_pass2( | |||
| 2220 | } | 2228 | } |
| 2221 | trace_xfs_log_recover_inode_recover(log, in_f); | 2229 | trace_xfs_log_recover_inode_recover(log, in_f); |
| 2222 | 2230 | ||
| 2223 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, | 2231 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0); |
| 2224 | XBF_LOCK); | ||
| 2225 | if (!bp) { | 2232 | if (!bp) { |
| 2226 | error = ENOMEM; | 2233 | error = ENOMEM; |
| 2227 | goto error; | 2234 | goto error; |
| @@ -2436,7 +2443,7 @@ xlog_recover_inode_pass2( | |||
| 2436 | write_inode_buffer: | 2443 | write_inode_buffer: |
| 2437 | ASSERT(bp->b_target->bt_mount == mp); | 2444 | ASSERT(bp->b_target->bt_mount == mp); |
| 2438 | bp->b_iodone = xlog_recover_iodone; | 2445 | bp->b_iodone = xlog_recover_iodone; |
| 2439 | xfs_buf_delwri_queue(bp); | 2446 | xfs_buf_delwri_queue(bp, buffer_list); |
| 2440 | xfs_buf_relse(bp); | 2447 | xfs_buf_relse(bp); |
| 2441 | error: | 2448 | error: |
| 2442 | if (need_free) | 2449 | if (need_free) |
| @@ -2477,6 +2484,7 @@ xlog_recover_quotaoff_pass1( | |||
| 2477 | STATIC int | 2484 | STATIC int |
| 2478 | xlog_recover_dquot_pass2( | 2485 | xlog_recover_dquot_pass2( |
| 2479 | xlog_t *log, | 2486 | xlog_t *log, |
| 2487 | struct list_head *buffer_list, | ||
| 2480 | xlog_recover_item_t *item) | 2488 | xlog_recover_item_t *item) |
| 2481 | { | 2489 | { |
| 2482 | xfs_mount_t *mp = log->l_mp; | 2490 | xfs_mount_t *mp = log->l_mp; |
| @@ -2530,14 +2538,11 @@ xlog_recover_dquot_pass2( | |||
| 2530 | return XFS_ERROR(EIO); | 2538 | return XFS_ERROR(EIO); |
| 2531 | ASSERT(dq_f->qlf_len == 1); | 2539 | ASSERT(dq_f->qlf_len == 1); |
| 2532 | 2540 | ||
| 2533 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 2541 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, |
| 2534 | dq_f->qlf_blkno, | 2542 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp); |
| 2535 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), | 2543 | if (error) |
| 2536 | 0, &bp); | ||
| 2537 | if (error) { | ||
| 2538 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#3)"); | ||
| 2539 | return error; | 2544 | return error; |
| 2540 | } | 2545 | |
| 2541 | ASSERT(bp); | 2546 | ASSERT(bp); |
| 2542 | ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); | 2547 | ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); |
| 2543 | 2548 | ||
| @@ -2558,7 +2563,7 @@ xlog_recover_dquot_pass2( | |||
| 2558 | ASSERT(dq_f->qlf_size == 2); | 2563 | ASSERT(dq_f->qlf_size == 2); |
| 2559 | ASSERT(bp->b_target->bt_mount == mp); | 2564 | ASSERT(bp->b_target->bt_mount == mp); |
| 2560 | bp->b_iodone = xlog_recover_iodone; | 2565 | bp->b_iodone = xlog_recover_iodone; |
| 2561 | xfs_buf_delwri_queue(bp); | 2566 | xfs_buf_delwri_queue(bp, buffer_list); |
| 2562 | xfs_buf_relse(bp); | 2567 | xfs_buf_relse(bp); |
| 2563 | 2568 | ||
| 2564 | return (0); | 2569 | return (0); |
| @@ -2642,7 +2647,8 @@ xlog_recover_efd_pass2( | |||
| 2642 | * xfs_trans_ail_delete() drops the | 2647 | * xfs_trans_ail_delete() drops the |
| 2643 | * AIL lock. | 2648 | * AIL lock. |
| 2644 | */ | 2649 | */ |
| 2645 | xfs_trans_ail_delete(ailp, lip); | 2650 | xfs_trans_ail_delete(ailp, lip, |
| 2651 | SHUTDOWN_CORRUPT_INCORE); | ||
| 2646 | xfs_efi_item_free(efip); | 2652 | xfs_efi_item_free(efip); |
| 2647 | spin_lock(&ailp->xa_lock); | 2653 | spin_lock(&ailp->xa_lock); |
| 2648 | break; | 2654 | break; |
| @@ -2712,21 +2718,22 @@ STATIC int | |||
| 2712 | xlog_recover_commit_pass2( | 2718 | xlog_recover_commit_pass2( |
| 2713 | struct log *log, | 2719 | struct log *log, |
| 2714 | struct xlog_recover *trans, | 2720 | struct xlog_recover *trans, |
| 2721 | struct list_head *buffer_list, | ||
| 2715 | xlog_recover_item_t *item) | 2722 | xlog_recover_item_t *item) |
| 2716 | { | 2723 | { |
| 2717 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); | 2724 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); |
| 2718 | 2725 | ||
| 2719 | switch (ITEM_TYPE(item)) { | 2726 | switch (ITEM_TYPE(item)) { |
| 2720 | case XFS_LI_BUF: | 2727 | case XFS_LI_BUF: |
| 2721 | return xlog_recover_buffer_pass2(log, item); | 2728 | return xlog_recover_buffer_pass2(log, buffer_list, item); |
| 2722 | case XFS_LI_INODE: | 2729 | case XFS_LI_INODE: |
| 2723 | return xlog_recover_inode_pass2(log, item); | 2730 | return xlog_recover_inode_pass2(log, buffer_list, item); |
| 2724 | case XFS_LI_EFI: | 2731 | case XFS_LI_EFI: |
| 2725 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); | 2732 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); |
| 2726 | case XFS_LI_EFD: | 2733 | case XFS_LI_EFD: |
| 2727 | return xlog_recover_efd_pass2(log, item); | 2734 | return xlog_recover_efd_pass2(log, item); |
| 2728 | case XFS_LI_DQUOT: | 2735 | case XFS_LI_DQUOT: |
| 2729 | return xlog_recover_dquot_pass2(log, item); | 2736 | return xlog_recover_dquot_pass2(log, buffer_list, item); |
| 2730 | case XFS_LI_QUOTAOFF: | 2737 | case XFS_LI_QUOTAOFF: |
| 2731 | /* nothing to do in pass2 */ | 2738 | /* nothing to do in pass2 */ |
| 2732 | return 0; | 2739 | return 0; |
| @@ -2750,8 +2757,9 @@ xlog_recover_commit_trans( | |||
| 2750 | struct xlog_recover *trans, | 2757 | struct xlog_recover *trans, |
| 2751 | int pass) | 2758 | int pass) |
| 2752 | { | 2759 | { |
| 2753 | int error = 0; | 2760 | int error = 0, error2; |
| 2754 | xlog_recover_item_t *item; | 2761 | xlog_recover_item_t *item; |
| 2762 | LIST_HEAD (buffer_list); | ||
| 2755 | 2763 | ||
| 2756 | hlist_del(&trans->r_list); | 2764 | hlist_del(&trans->r_list); |
| 2757 | 2765 | ||
| @@ -2760,16 +2768,27 @@ xlog_recover_commit_trans( | |||
| 2760 | return error; | 2768 | return error; |
| 2761 | 2769 | ||
| 2762 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | 2770 | list_for_each_entry(item, &trans->r_itemq, ri_list) { |
| 2763 | if (pass == XLOG_RECOVER_PASS1) | 2771 | switch (pass) { |
| 2772 | case XLOG_RECOVER_PASS1: | ||
| 2764 | error = xlog_recover_commit_pass1(log, trans, item); | 2773 | error = xlog_recover_commit_pass1(log, trans, item); |
| 2765 | else | 2774 | break; |
| 2766 | error = xlog_recover_commit_pass2(log, trans, item); | 2775 | case XLOG_RECOVER_PASS2: |
| 2776 | error = xlog_recover_commit_pass2(log, trans, | ||
| 2777 | &buffer_list, item); | ||
| 2778 | break; | ||
| 2779 | default: | ||
| 2780 | ASSERT(0); | ||
| 2781 | } | ||
| 2782 | |||
| 2767 | if (error) | 2783 | if (error) |
| 2768 | return error; | 2784 | goto out; |
| 2769 | } | 2785 | } |
| 2770 | 2786 | ||
| 2771 | xlog_recover_free_trans(trans); | 2787 | xlog_recover_free_trans(trans); |
| 2772 | return 0; | 2788 | |
| 2789 | out: | ||
| 2790 | error2 = xfs_buf_delwri_submit(&buffer_list); | ||
| 2791 | return error ? error : error2; | ||
| 2773 | } | 2792 | } |
| 2774 | 2793 | ||
| 2775 | STATIC int | 2794 | STATIC int |
| @@ -3079,7 +3098,7 @@ xlog_recover_process_one_iunlink( | |||
| 3079 | /* | 3098 | /* |
| 3080 | * Get the on disk inode to find the next inode in the bucket. | 3099 | * Get the on disk inode to find the next inode in the bucket. |
| 3081 | */ | 3100 | */ |
| 3082 | error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK); | 3101 | error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0); |
| 3083 | if (error) | 3102 | if (error) |
| 3084 | goto fail_iput; | 3103 | goto fail_iput; |
| 3085 | 3104 | ||
| @@ -3639,11 +3658,8 @@ xlog_do_recover( | |||
| 3639 | * First replay the images in the log. | 3658 | * First replay the images in the log. |
| 3640 | */ | 3659 | */ |
| 3641 | error = xlog_do_log_recovery(log, head_blk, tail_blk); | 3660 | error = xlog_do_log_recovery(log, head_blk, tail_blk); |
| 3642 | if (error) { | 3661 | if (error) |
| 3643 | return error; | 3662 | return error; |
| 3644 | } | ||
| 3645 | |||
| 3646 | xfs_flush_buftarg(log->l_mp->m_ddev_targp, 1); | ||
| 3647 | 3663 | ||
| 3648 | /* | 3664 | /* |
| 3649 | * If IO errors happened during recovery, bail out. | 3665 | * If IO errors happened during recovery, bail out. |
| @@ -3670,7 +3686,6 @@ xlog_do_recover( | |||
| 3670 | bp = xfs_getsb(log->l_mp, 0); | 3686 | bp = xfs_getsb(log->l_mp, 0); |
| 3671 | XFS_BUF_UNDONE(bp); | 3687 | XFS_BUF_UNDONE(bp); |
| 3672 | ASSERT(!(XFS_BUF_ISWRITE(bp))); | 3688 | ASSERT(!(XFS_BUF_ISWRITE(bp))); |
| 3673 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | ||
| 3674 | XFS_BUF_READ(bp); | 3689 | XFS_BUF_READ(bp); |
| 3675 | XFS_BUF_UNASYNC(bp); | 3690 | XFS_BUF_UNASYNC(bp); |
| 3676 | xfsbdstrat(log->l_mp, bp); | 3691 | xfsbdstrat(log->l_mp, bp); |
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index bd672def95ac..331cd9f83a7f 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 1ffead4b2296..536021fb3d4e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
| 24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
| 25 | #include "xfs_trans_priv.h" | ||
| 25 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 27 | #include "xfs_ag.h" |
| 27 | #include "xfs_dir2.h" | 28 | #include "xfs_dir2.h" |
| @@ -37,7 +38,6 @@ | |||
| 37 | #include "xfs_rtalloc.h" | 38 | #include "xfs_rtalloc.h" |
| 38 | #include "xfs_bmap.h" | 39 | #include "xfs_bmap.h" |
| 39 | #include "xfs_error.h" | 40 | #include "xfs_error.h" |
| 40 | #include "xfs_rw.h" | ||
| 41 | #include "xfs_quota.h" | 41 | #include "xfs_quota.h" |
| 42 | #include "xfs_fsops.h" | 42 | #include "xfs_fsops.h" |
| 43 | #include "xfs_utils.h" | 43 | #include "xfs_utils.h" |
| @@ -683,8 +683,8 @@ xfs_readsb(xfs_mount_t *mp, int flags) | |||
| 683 | sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); | 683 | sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); |
| 684 | 684 | ||
| 685 | reread: | 685 | reread: |
| 686 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, | 686 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, |
| 687 | XFS_SB_DADDR, sector_size, 0); | 687 | BTOBB(sector_size), 0); |
| 688 | if (!bp) { | 688 | if (!bp) { |
| 689 | if (loud) | 689 | if (loud) |
| 690 | xfs_warn(mp, "SB buffer read failed"); | 690 | xfs_warn(mp, "SB buffer read failed"); |
| @@ -1032,9 +1032,9 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
| 1032 | xfs_warn(mp, "filesystem size mismatch detected"); | 1032 | xfs_warn(mp, "filesystem size mismatch detected"); |
| 1033 | return XFS_ERROR(EFBIG); | 1033 | return XFS_ERROR(EFBIG); |
| 1034 | } | 1034 | } |
| 1035 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, | 1035 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, |
| 1036 | d - XFS_FSS_TO_BB(mp, 1), | 1036 | d - XFS_FSS_TO_BB(mp, 1), |
| 1037 | BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); | 1037 | XFS_FSS_TO_BB(mp, 1), 0); |
| 1038 | if (!bp) { | 1038 | if (!bp) { |
| 1039 | xfs_warn(mp, "last sector read failed"); | 1039 | xfs_warn(mp, "last sector read failed"); |
| 1040 | return EIO; | 1040 | return EIO; |
| @@ -1047,9 +1047,9 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
| 1047 | xfs_warn(mp, "log size mismatch detected"); | 1047 | xfs_warn(mp, "log size mismatch detected"); |
| 1048 | return XFS_ERROR(EFBIG); | 1048 | return XFS_ERROR(EFBIG); |
| 1049 | } | 1049 | } |
| 1050 | bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, | 1050 | bp = xfs_buf_read_uncached(mp->m_logdev_targp, |
| 1051 | d - XFS_FSB_TO_BB(mp, 1), | 1051 | d - XFS_FSB_TO_BB(mp, 1), |
| 1052 | XFS_FSB_TO_B(mp, 1), 0); | 1052 | XFS_FSB_TO_BB(mp, 1), 0); |
| 1053 | if (!bp) { | 1053 | if (!bp) { |
| 1054 | xfs_warn(mp, "log device read failed"); | 1054 | xfs_warn(mp, "log device read failed"); |
| 1055 | return EIO; | 1055 | return EIO; |
| @@ -1288,7 +1288,7 @@ xfs_mountfs( | |||
| 1288 | XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); | 1288 | XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); |
| 1289 | if (error) { | 1289 | if (error) { |
| 1290 | xfs_warn(mp, "log mount failed"); | 1290 | xfs_warn(mp, "log mount failed"); |
| 1291 | goto out_free_perag; | 1291 | goto out_fail_wait; |
| 1292 | } | 1292 | } |
| 1293 | 1293 | ||
| 1294 | /* | 1294 | /* |
| @@ -1315,7 +1315,7 @@ xfs_mountfs( | |||
| 1315 | !mp->m_sb.sb_inprogress) { | 1315 | !mp->m_sb.sb_inprogress) { |
| 1316 | error = xfs_initialize_perag_data(mp, sbp->sb_agcount); | 1316 | error = xfs_initialize_perag_data(mp, sbp->sb_agcount); |
| 1317 | if (error) | 1317 | if (error) |
| 1318 | goto out_free_perag; | 1318 | goto out_fail_wait; |
| 1319 | } | 1319 | } |
| 1320 | 1320 | ||
| 1321 | /* | 1321 | /* |
| @@ -1439,6 +1439,10 @@ xfs_mountfs( | |||
| 1439 | IRELE(rip); | 1439 | IRELE(rip); |
| 1440 | out_log_dealloc: | 1440 | out_log_dealloc: |
| 1441 | xfs_log_unmount(mp); | 1441 | xfs_log_unmount(mp); |
| 1442 | out_fail_wait: | ||
| 1443 | if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) | ||
| 1444 | xfs_wait_buftarg(mp->m_logdev_targp); | ||
| 1445 | xfs_wait_buftarg(mp->m_ddev_targp); | ||
| 1442 | out_free_perag: | 1446 | out_free_perag: |
| 1443 | xfs_free_perag(mp); | 1447 | xfs_free_perag(mp); |
| 1444 | out_remove_uuid: | 1448 | out_remove_uuid: |
| @@ -1475,15 +1479,15 @@ xfs_unmountfs( | |||
| 1475 | xfs_log_force(mp, XFS_LOG_SYNC); | 1479 | xfs_log_force(mp, XFS_LOG_SYNC); |
| 1476 | 1480 | ||
| 1477 | /* | 1481 | /* |
| 1478 | * Do a delwri reclaim pass first so that as many dirty inodes are | 1482 | * Flush all pending changes from the AIL. |
| 1479 | * queued up for IO as possible. Then flush the buffers before making | 1483 | */ |
| 1480 | * a synchronous path to catch all the remaining inodes are reclaimed. | 1484 | xfs_ail_push_all_sync(mp->m_ail); |
| 1481 | * This makes the reclaim process as quick as possible by avoiding | 1485 | |
| 1482 | * synchronous writeout and blocking on inodes already in the delwri | 1486 | /* |
| 1483 | * state as much as possible. | 1487 | * And reclaim all inodes. At this point there should be no dirty |
| 1488 | * inode, and none should be pinned or locked, but use synchronous | ||
| 1489 | * reclaim just to be sure. | ||
| 1484 | */ | 1490 | */ |
| 1485 | xfs_reclaim_inodes(mp, 0); | ||
| 1486 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
| 1487 | xfs_reclaim_inodes(mp, SYNC_WAIT); | 1491 | xfs_reclaim_inodes(mp, SYNC_WAIT); |
| 1488 | 1492 | ||
| 1489 | xfs_qm_unmount(mp); | 1493 | xfs_qm_unmount(mp); |
| @@ -1519,15 +1523,12 @@ xfs_unmountfs( | |||
| 1519 | if (error) | 1523 | if (error) |
| 1520 | xfs_warn(mp, "Unable to update superblock counters. " | 1524 | xfs_warn(mp, "Unable to update superblock counters. " |
| 1521 | "Freespace may not be correct on next mount."); | 1525 | "Freespace may not be correct on next mount."); |
| 1522 | xfs_unmountfs_writesb(mp); | ||
| 1523 | 1526 | ||
| 1524 | /* | 1527 | /* |
| 1525 | * Make sure all buffers have been flushed and completed before | 1528 | * At this point we might have modified the superblock again and thus |
| 1526 | * unmounting the log. | 1529 | * added an item to the AIL, thus flush it again. |
| 1527 | */ | 1530 | */ |
| 1528 | error = xfs_flush_buftarg(mp->m_ddev_targp, 1); | 1531 | xfs_ail_push_all_sync(mp->m_ail); |
| 1529 | if (error) | ||
| 1530 | xfs_warn(mp, "%d busy buffers during unmount.", error); | ||
| 1531 | xfs_wait_buftarg(mp->m_ddev_targp); | 1532 | xfs_wait_buftarg(mp->m_ddev_targp); |
| 1532 | 1533 | ||
| 1533 | xfs_log_unmount_write(mp); | 1534 | xfs_log_unmount_write(mp); |
| @@ -1588,36 +1589,6 @@ xfs_log_sbcount(xfs_mount_t *mp) | |||
| 1588 | return error; | 1589 | return error; |
| 1589 | } | 1590 | } |
| 1590 | 1591 | ||
| 1591 | int | ||
| 1592 | xfs_unmountfs_writesb(xfs_mount_t *mp) | ||
| 1593 | { | ||
| 1594 | xfs_buf_t *sbp; | ||
| 1595 | int error = 0; | ||
| 1596 | |||
| 1597 | /* | ||
| 1598 | * skip superblock write if fs is read-only, or | ||
| 1599 | * if we are doing a forced umount. | ||
| 1600 | */ | ||
| 1601 | if (!((mp->m_flags & XFS_MOUNT_RDONLY) || | ||
| 1602 | XFS_FORCED_SHUTDOWN(mp))) { | ||
| 1603 | |||
| 1604 | sbp = xfs_getsb(mp, 0); | ||
| 1605 | |||
| 1606 | XFS_BUF_UNDONE(sbp); | ||
| 1607 | XFS_BUF_UNREAD(sbp); | ||
| 1608 | xfs_buf_delwri_dequeue(sbp); | ||
| 1609 | XFS_BUF_WRITE(sbp); | ||
| 1610 | XFS_BUF_UNASYNC(sbp); | ||
| 1611 | ASSERT(sbp->b_target == mp->m_ddev_targp); | ||
| 1612 | xfsbdstrat(mp, sbp); | ||
| 1613 | error = xfs_buf_iowait(sbp); | ||
| 1614 | if (error) | ||
| 1615 | xfs_buf_ioerror_alert(sbp, __func__); | ||
| 1616 | xfs_buf_relse(sbp); | ||
| 1617 | } | ||
| 1618 | return error; | ||
| 1619 | } | ||
| 1620 | |||
| 1621 | /* | 1592 | /* |
| 1622 | * xfs_mod_sb() can be used to copy arbitrary changes to the | 1593 | * xfs_mod_sb() can be used to copy arbitrary changes to the |
| 1623 | * in-core superblock into the superblock buffer to be logged. | 1594 | * in-core superblock into the superblock buffer to be logged. |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 9eba73887829..8b89c5ac72d9 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
| @@ -214,6 +214,7 @@ typedef struct xfs_mount { | |||
| 214 | 214 | ||
| 215 | struct workqueue_struct *m_data_workqueue; | 215 | struct workqueue_struct *m_data_workqueue; |
| 216 | struct workqueue_struct *m_unwritten_workqueue; | 216 | struct workqueue_struct *m_unwritten_workqueue; |
| 217 | struct workqueue_struct *m_cil_workqueue; | ||
| 217 | } xfs_mount_t; | 218 | } xfs_mount_t; |
| 218 | 219 | ||
| 219 | /* | 220 | /* |
| @@ -378,7 +379,6 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); | |||
| 378 | extern int xfs_mountfs(xfs_mount_t *mp); | 379 | extern int xfs_mountfs(xfs_mount_t *mp); |
| 379 | 380 | ||
| 380 | extern void xfs_unmountfs(xfs_mount_t *); | 381 | extern void xfs_unmountfs(xfs_mount_t *); |
| 381 | extern int xfs_unmountfs_writesb(xfs_mount_t *); | ||
| 382 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); | 382 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); |
| 383 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, | 383 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, |
| 384 | uint, int); | 384 | uint, int); |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 55c6afedc879..249db1987764 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_bit.h" | 20 | #include "xfs_bit.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
| @@ -65,7 +64,8 @@ STATIC int | |||
| 65 | xfs_qm_dquot_walk( | 64 | xfs_qm_dquot_walk( |
| 66 | struct xfs_mount *mp, | 65 | struct xfs_mount *mp, |
| 67 | int type, | 66 | int type, |
| 68 | int (*execute)(struct xfs_dquot *dqp)) | 67 | int (*execute)(struct xfs_dquot *dqp, void *data), |
| 68 | void *data) | ||
| 69 | { | 69 | { |
| 70 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 70 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
| 71 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); | 71 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); |
| @@ -97,7 +97,7 @@ restart: | |||
| 97 | 97 | ||
| 98 | next_index = be32_to_cpu(dqp->q_core.d_id) + 1; | 98 | next_index = be32_to_cpu(dqp->q_core.d_id) + 1; |
| 99 | 99 | ||
| 100 | error = execute(batch[i]); | 100 | error = execute(batch[i], data); |
| 101 | if (error == EAGAIN) { | 101 | if (error == EAGAIN) { |
| 102 | skipped++; | 102 | skipped++; |
| 103 | continue; | 103 | continue; |
| @@ -129,7 +129,8 @@ restart: | |||
| 129 | */ | 129 | */ |
| 130 | STATIC int | 130 | STATIC int |
| 131 | xfs_qm_dqpurge( | 131 | xfs_qm_dqpurge( |
| 132 | struct xfs_dquot *dqp) | 132 | struct xfs_dquot *dqp, |
| 133 | void *data) | ||
| 133 | { | 134 | { |
| 134 | struct xfs_mount *mp = dqp->q_mount; | 135 | struct xfs_mount *mp = dqp->q_mount; |
| 135 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 136 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
| @@ -153,21 +154,7 @@ xfs_qm_dqpurge( | |||
| 153 | 154 | ||
| 154 | dqp->dq_flags |= XFS_DQ_FREEING; | 155 | dqp->dq_flags |= XFS_DQ_FREEING; |
| 155 | 156 | ||
| 156 | /* | 157 | xfs_dqflock(dqp); |
| 157 | * If we're turning off quotas, we have to make sure that, for | ||
| 158 | * example, we don't delete quota disk blocks while dquots are | ||
| 159 | * in the process of getting written to those disk blocks. | ||
| 160 | * This dquot might well be on AIL, and we can't leave it there | ||
| 161 | * if we're turning off quotas. Basically, we need this flush | ||
| 162 | * lock, and are willing to block on it. | ||
| 163 | */ | ||
| 164 | if (!xfs_dqflock_nowait(dqp)) { | ||
| 165 | /* | ||
| 166 | * Block on the flush lock after nudging dquot buffer, | ||
| 167 | * if it is incore. | ||
| 168 | */ | ||
| 169 | xfs_dqflock_pushbuf_wait(dqp); | ||
| 170 | } | ||
| 171 | 158 | ||
| 172 | /* | 159 | /* |
| 173 | * If we are turning this type of quotas off, we don't care | 160 | * If we are turning this type of quotas off, we don't care |
| @@ -175,16 +162,21 @@ xfs_qm_dqpurge( | |||
| 175 | * we're unmounting, we do care, so we flush it and wait. | 162 | * we're unmounting, we do care, so we flush it and wait. |
| 176 | */ | 163 | */ |
| 177 | if (XFS_DQ_IS_DIRTY(dqp)) { | 164 | if (XFS_DQ_IS_DIRTY(dqp)) { |
| 178 | int error; | 165 | struct xfs_buf *bp = NULL; |
| 166 | int error; | ||
| 179 | 167 | ||
| 180 | /* | 168 | /* |
| 181 | * We don't care about getting disk errors here. We need | 169 | * We don't care about getting disk errors here. We need |
| 182 | * to purge this dquot anyway, so we go ahead regardless. | 170 | * to purge this dquot anyway, so we go ahead regardless. |
| 183 | */ | 171 | */ |
| 184 | error = xfs_qm_dqflush(dqp, SYNC_WAIT); | 172 | error = xfs_qm_dqflush(dqp, &bp); |
| 185 | if (error) | 173 | if (error) { |
| 186 | xfs_warn(mp, "%s: dquot %p flush failed", | 174 | xfs_warn(mp, "%s: dquot %p flush failed", |
| 187 | __func__, dqp); | 175 | __func__, dqp); |
| 176 | } else { | ||
| 177 | error = xfs_bwrite(bp); | ||
| 178 | xfs_buf_relse(bp); | ||
| 179 | } | ||
| 188 | xfs_dqflock(dqp); | 180 | xfs_dqflock(dqp); |
| 189 | } | 181 | } |
| 190 | 182 | ||
| @@ -226,11 +218,11 @@ xfs_qm_dqpurge_all( | |||
| 226 | uint flags) | 218 | uint flags) |
| 227 | { | 219 | { |
| 228 | if (flags & XFS_QMOPT_UQUOTA) | 220 | if (flags & XFS_QMOPT_UQUOTA) |
| 229 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge); | 221 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); |
| 230 | if (flags & XFS_QMOPT_GQUOTA) | 222 | if (flags & XFS_QMOPT_GQUOTA) |
| 231 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge); | 223 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); |
| 232 | if (flags & XFS_QMOPT_PQUOTA) | 224 | if (flags & XFS_QMOPT_PQUOTA) |
| 233 | xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge); | 225 | xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, NULL); |
| 234 | } | 226 | } |
| 235 | 227 | ||
| 236 | /* | 228 | /* |
| @@ -483,6 +475,23 @@ done: | |||
| 483 | xfs_dqunlock(udq); | 475 | xfs_dqunlock(udq); |
| 484 | } | 476 | } |
| 485 | 477 | ||
| 478 | static bool | ||
| 479 | xfs_qm_need_dqattach( | ||
| 480 | struct xfs_inode *ip) | ||
| 481 | { | ||
| 482 | struct xfs_mount *mp = ip->i_mount; | ||
| 483 | |||
| 484 | if (!XFS_IS_QUOTA_RUNNING(mp)) | ||
| 485 | return false; | ||
| 486 | if (!XFS_IS_QUOTA_ON(mp)) | ||
| 487 | return false; | ||
| 488 | if (!XFS_NOT_DQATTACHED(mp, ip)) | ||
| 489 | return false; | ||
| 490 | if (ip->i_ino == mp->m_sb.sb_uquotino || | ||
| 491 | ip->i_ino == mp->m_sb.sb_gquotino) | ||
| 492 | return false; | ||
| 493 | return true; | ||
| 494 | } | ||
| 486 | 495 | ||
| 487 | /* | 496 | /* |
| 488 | * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON | 497 | * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON |
| @@ -500,11 +509,7 @@ xfs_qm_dqattach_locked( | |||
| 500 | uint nquotas = 0; | 509 | uint nquotas = 0; |
| 501 | int error = 0; | 510 | int error = 0; |
| 502 | 511 | ||
| 503 | if (!XFS_IS_QUOTA_RUNNING(mp) || | 512 | if (!xfs_qm_need_dqattach(ip)) |
| 504 | !XFS_IS_QUOTA_ON(mp) || | ||
| 505 | !XFS_NOT_DQATTACHED(mp, ip) || | ||
| 506 | ip->i_ino == mp->m_sb.sb_uquotino || | ||
| 507 | ip->i_ino == mp->m_sb.sb_gquotino) | ||
| 508 | return 0; | 513 | return 0; |
| 509 | 514 | ||
| 510 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 515 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
| @@ -575,6 +580,9 @@ xfs_qm_dqattach( | |||
| 575 | { | 580 | { |
| 576 | int error; | 581 | int error; |
| 577 | 582 | ||
| 583 | if (!xfs_qm_need_dqattach(ip)) | ||
| 584 | return 0; | ||
| 585 | |||
| 578 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 586 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
| 579 | error = xfs_qm_dqattach_locked(ip, flags); | 587 | error = xfs_qm_dqattach_locked(ip, flags); |
| 580 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 588 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| @@ -855,15 +863,16 @@ xfs_qm_reset_dqcounts( | |||
| 855 | 863 | ||
| 856 | STATIC int | 864 | STATIC int |
| 857 | xfs_qm_dqiter_bufs( | 865 | xfs_qm_dqiter_bufs( |
| 858 | xfs_mount_t *mp, | 866 | struct xfs_mount *mp, |
| 859 | xfs_dqid_t firstid, | 867 | xfs_dqid_t firstid, |
| 860 | xfs_fsblock_t bno, | 868 | xfs_fsblock_t bno, |
| 861 | xfs_filblks_t blkcnt, | 869 | xfs_filblks_t blkcnt, |
| 862 | uint flags) | 870 | uint flags, |
| 871 | struct list_head *buffer_list) | ||
| 863 | { | 872 | { |
| 864 | xfs_buf_t *bp; | 873 | struct xfs_buf *bp; |
| 865 | int error; | 874 | int error; |
| 866 | int type; | 875 | int type; |
| 867 | 876 | ||
| 868 | ASSERT(blkcnt > 0); | 877 | ASSERT(blkcnt > 0); |
| 869 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : | 878 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : |
| @@ -887,7 +896,7 @@ xfs_qm_dqiter_bufs( | |||
| 887 | break; | 896 | break; |
| 888 | 897 | ||
| 889 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); | 898 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); |
| 890 | xfs_buf_delwri_queue(bp); | 899 | xfs_buf_delwri_queue(bp, buffer_list); |
| 891 | xfs_buf_relse(bp); | 900 | xfs_buf_relse(bp); |
| 892 | /* | 901 | /* |
| 893 | * goto the next block. | 902 | * goto the next block. |
| @@ -895,6 +904,7 @@ xfs_qm_dqiter_bufs( | |||
| 895 | bno++; | 904 | bno++; |
| 896 | firstid += mp->m_quotainfo->qi_dqperchunk; | 905 | firstid += mp->m_quotainfo->qi_dqperchunk; |
| 897 | } | 906 | } |
| 907 | |||
| 898 | return error; | 908 | return error; |
| 899 | } | 909 | } |
| 900 | 910 | ||
| @@ -904,11 +914,12 @@ xfs_qm_dqiter_bufs( | |||
| 904 | */ | 914 | */ |
| 905 | STATIC int | 915 | STATIC int |
| 906 | xfs_qm_dqiterate( | 916 | xfs_qm_dqiterate( |
| 907 | xfs_mount_t *mp, | 917 | struct xfs_mount *mp, |
| 908 | xfs_inode_t *qip, | 918 | struct xfs_inode *qip, |
| 909 | uint flags) | 919 | uint flags, |
| 920 | struct list_head *buffer_list) | ||
| 910 | { | 921 | { |
| 911 | xfs_bmbt_irec_t *map; | 922 | struct xfs_bmbt_irec *map; |
| 912 | int i, nmaps; /* number of map entries */ | 923 | int i, nmaps; /* number of map entries */ |
| 913 | int error; /* return value */ | 924 | int error; /* return value */ |
| 914 | xfs_fileoff_t lblkno; | 925 | xfs_fileoff_t lblkno; |
| @@ -975,21 +986,17 @@ xfs_qm_dqiterate( | |||
| 975 | * Iterate thru all the blks in the extent and | 986 | * Iterate thru all the blks in the extent and |
| 976 | * reset the counters of all the dquots inside them. | 987 | * reset the counters of all the dquots inside them. |
| 977 | */ | 988 | */ |
| 978 | if ((error = xfs_qm_dqiter_bufs(mp, | 989 | error = xfs_qm_dqiter_bufs(mp, firstid, |
| 979 | firstid, | 990 | map[i].br_startblock, |
| 980 | map[i].br_startblock, | 991 | map[i].br_blockcount, |
| 981 | map[i].br_blockcount, | 992 | flags, buffer_list); |
| 982 | flags))) { | 993 | if (error) |
| 983 | break; | 994 | goto out; |
| 984 | } | ||
| 985 | } | 995 | } |
| 986 | |||
| 987 | if (error) | ||
| 988 | break; | ||
| 989 | } while (nmaps > 0); | 996 | } while (nmaps > 0); |
| 990 | 997 | ||
| 998 | out: | ||
| 991 | kmem_free(map); | 999 | kmem_free(map); |
| 992 | |||
| 993 | return error; | 1000 | return error; |
| 994 | } | 1001 | } |
| 995 | 1002 | ||
| @@ -1182,8 +1189,11 @@ error0: | |||
| 1182 | 1189 | ||
| 1183 | STATIC int | 1190 | STATIC int |
| 1184 | xfs_qm_flush_one( | 1191 | xfs_qm_flush_one( |
| 1185 | struct xfs_dquot *dqp) | 1192 | struct xfs_dquot *dqp, |
| 1193 | void *data) | ||
| 1186 | { | 1194 | { |
| 1195 | struct list_head *buffer_list = data; | ||
| 1196 | struct xfs_buf *bp = NULL; | ||
| 1187 | int error = 0; | 1197 | int error = 0; |
| 1188 | 1198 | ||
| 1189 | xfs_dqlock(dqp); | 1199 | xfs_dqlock(dqp); |
| @@ -1192,11 +1202,13 @@ xfs_qm_flush_one( | |||
| 1192 | if (!XFS_DQ_IS_DIRTY(dqp)) | 1202 | if (!XFS_DQ_IS_DIRTY(dqp)) |
| 1193 | goto out_unlock; | 1203 | goto out_unlock; |
| 1194 | 1204 | ||
| 1195 | if (!xfs_dqflock_nowait(dqp)) | 1205 | xfs_dqflock(dqp); |
| 1196 | xfs_dqflock_pushbuf_wait(dqp); | 1206 | error = xfs_qm_dqflush(dqp, &bp); |
| 1197 | 1207 | if (error) | |
| 1198 | error = xfs_qm_dqflush(dqp, 0); | 1208 | goto out_unlock; |
| 1199 | 1209 | ||
| 1210 | xfs_buf_delwri_queue(bp, buffer_list); | ||
| 1211 | xfs_buf_relse(bp); | ||
| 1200 | out_unlock: | 1212 | out_unlock: |
| 1201 | xfs_dqunlock(dqp); | 1213 | xfs_dqunlock(dqp); |
| 1202 | return error; | 1214 | return error; |
| @@ -1215,6 +1227,7 @@ xfs_qm_quotacheck( | |||
| 1215 | size_t structsz; | 1227 | size_t structsz; |
| 1216 | xfs_inode_t *uip, *gip; | 1228 | xfs_inode_t *uip, *gip; |
| 1217 | uint flags; | 1229 | uint flags; |
| 1230 | LIST_HEAD (buffer_list); | ||
| 1218 | 1231 | ||
| 1219 | count = INT_MAX; | 1232 | count = INT_MAX; |
| 1220 | structsz = 1; | 1233 | structsz = 1; |
| @@ -1233,7 +1246,8 @@ xfs_qm_quotacheck( | |||
| 1233 | */ | 1246 | */ |
| 1234 | uip = mp->m_quotainfo->qi_uquotaip; | 1247 | uip = mp->m_quotainfo->qi_uquotaip; |
| 1235 | if (uip) { | 1248 | if (uip) { |
| 1236 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA); | 1249 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, |
| 1250 | &buffer_list); | ||
| 1237 | if (error) | 1251 | if (error) |
| 1238 | goto error_return; | 1252 | goto error_return; |
| 1239 | flags |= XFS_UQUOTA_CHKD; | 1253 | flags |= XFS_UQUOTA_CHKD; |
| @@ -1242,7 +1256,8 @@ xfs_qm_quotacheck( | |||
| 1242 | gip = mp->m_quotainfo->qi_gquotaip; | 1256 | gip = mp->m_quotainfo->qi_gquotaip; |
| 1243 | if (gip) { | 1257 | if (gip) { |
| 1244 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? | 1258 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? |
| 1245 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); | 1259 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, |
| 1260 | &buffer_list); | ||
| 1246 | if (error) | 1261 | if (error) |
| 1247 | goto error_return; | 1262 | goto error_return; |
| 1248 | flags |= XFS_OQUOTA_CHKD; | 1263 | flags |= XFS_OQUOTA_CHKD; |
| @@ -1265,19 +1280,27 @@ xfs_qm_quotacheck( | |||
| 1265 | * We've made all the changes that we need to make incore. Flush them | 1280 | * We've made all the changes that we need to make incore. Flush them |
| 1266 | * down to disk buffers if everything was updated successfully. | 1281 | * down to disk buffers if everything was updated successfully. |
| 1267 | */ | 1282 | */ |
| 1268 | if (XFS_IS_UQUOTA_ON(mp)) | 1283 | if (XFS_IS_UQUOTA_ON(mp)) { |
| 1269 | error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one); | 1284 | error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one, |
| 1285 | &buffer_list); | ||
| 1286 | } | ||
| 1270 | if (XFS_IS_GQUOTA_ON(mp)) { | 1287 | if (XFS_IS_GQUOTA_ON(mp)) { |
| 1271 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one); | 1288 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one, |
| 1289 | &buffer_list); | ||
| 1272 | if (!error) | 1290 | if (!error) |
| 1273 | error = error2; | 1291 | error = error2; |
| 1274 | } | 1292 | } |
| 1275 | if (XFS_IS_PQUOTA_ON(mp)) { | 1293 | if (XFS_IS_PQUOTA_ON(mp)) { |
| 1276 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one); | 1294 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one, |
| 1295 | &buffer_list); | ||
| 1277 | if (!error) | 1296 | if (!error) |
| 1278 | error = error2; | 1297 | error = error2; |
| 1279 | } | 1298 | } |
| 1280 | 1299 | ||
| 1300 | error2 = xfs_buf_delwri_submit(&buffer_list); | ||
| 1301 | if (!error) | ||
| 1302 | error = error2; | ||
| 1303 | |||
| 1281 | /* | 1304 | /* |
| 1282 | * We can get this error if we couldn't do a dquot allocation inside | 1305 | * We can get this error if we couldn't do a dquot allocation inside |
| 1283 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the | 1306 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the |
| @@ -1291,15 +1314,6 @@ xfs_qm_quotacheck( | |||
| 1291 | } | 1314 | } |
| 1292 | 1315 | ||
| 1293 | /* | 1316 | /* |
| 1294 | * We didn't log anything, because if we crashed, we'll have to | ||
| 1295 | * start the quotacheck from scratch anyway. However, we must make | ||
| 1296 | * sure that our dquot changes are secure before we put the | ||
| 1297 | * quotacheck'd stamp on the superblock. So, here we do a synchronous | ||
| 1298 | * flush. | ||
| 1299 | */ | ||
| 1300 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
| 1301 | |||
| 1302 | /* | ||
| 1303 | * If one type of quotas is off, then it will lose its | 1317 | * If one type of quotas is off, then it will lose its |
| 1304 | * quotachecked status, since we won't be doing accounting for | 1318 | * quotachecked status, since we won't be doing accounting for |
| 1305 | * that type anymore. | 1319 | * that type anymore. |
| @@ -1308,6 +1322,13 @@ xfs_qm_quotacheck( | |||
| 1308 | mp->m_qflags |= flags; | 1322 | mp->m_qflags |= flags; |
| 1309 | 1323 | ||
| 1310 | error_return: | 1324 | error_return: |
| 1325 | while (!list_empty(&buffer_list)) { | ||
| 1326 | struct xfs_buf *bp = | ||
| 1327 | list_first_entry(&buffer_list, struct xfs_buf, b_list); | ||
| 1328 | list_del_init(&bp->b_list); | ||
| 1329 | xfs_buf_relse(bp); | ||
| 1330 | } | ||
| 1331 | |||
| 1311 | if (error) { | 1332 | if (error) { |
| 1312 | xfs_warn(mp, | 1333 | xfs_warn(mp, |
| 1313 | "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", | 1334 | "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", |
| @@ -1424,6 +1445,7 @@ xfs_qm_dqfree_one( | |||
| 1424 | STATIC void | 1445 | STATIC void |
| 1425 | xfs_qm_dqreclaim_one( | 1446 | xfs_qm_dqreclaim_one( |
| 1426 | struct xfs_dquot *dqp, | 1447 | struct xfs_dquot *dqp, |
| 1448 | struct list_head *buffer_list, | ||
| 1427 | struct list_head *dispose_list) | 1449 | struct list_head *dispose_list) |
| 1428 | { | 1450 | { |
| 1429 | struct xfs_mount *mp = dqp->q_mount; | 1451 | struct xfs_mount *mp = dqp->q_mount; |
| @@ -1456,25 +1478,20 @@ xfs_qm_dqreclaim_one( | |||
| 1456 | if (!xfs_dqflock_nowait(dqp)) | 1478 | if (!xfs_dqflock_nowait(dqp)) |
| 1457 | goto out_busy; | 1479 | goto out_busy; |
| 1458 | 1480 | ||
| 1459 | /* | ||
| 1460 | * We have the flush lock so we know that this is not in the | ||
| 1461 | * process of being flushed. So, if this is dirty, flush it | ||
| 1462 | * DELWRI so that we don't get a freelist infested with | ||
| 1463 | * dirty dquots. | ||
| 1464 | */ | ||
| 1465 | if (XFS_DQ_IS_DIRTY(dqp)) { | 1481 | if (XFS_DQ_IS_DIRTY(dqp)) { |
| 1482 | struct xfs_buf *bp = NULL; | ||
| 1483 | |||
| 1466 | trace_xfs_dqreclaim_dirty(dqp); | 1484 | trace_xfs_dqreclaim_dirty(dqp); |
| 1467 | 1485 | ||
| 1468 | /* | 1486 | error = xfs_qm_dqflush(dqp, &bp); |
| 1469 | * We flush it delayed write, so don't bother releasing the | ||
| 1470 | * freelist lock. | ||
| 1471 | */ | ||
| 1472 | error = xfs_qm_dqflush(dqp, 0); | ||
| 1473 | if (error) { | 1487 | if (error) { |
| 1474 | xfs_warn(mp, "%s: dquot %p flush failed", | 1488 | xfs_warn(mp, "%s: dquot %p flush failed", |
| 1475 | __func__, dqp); | 1489 | __func__, dqp); |
| 1490 | goto out_busy; | ||
| 1476 | } | 1491 | } |
| 1477 | 1492 | ||
| 1493 | xfs_buf_delwri_queue(bp, buffer_list); | ||
| 1494 | xfs_buf_relse(bp); | ||
| 1478 | /* | 1495 | /* |
| 1479 | * Give the dquot another try on the freelist, as the | 1496 | * Give the dquot another try on the freelist, as the |
| 1480 | * flushing will take some time. | 1497 | * flushing will take some time. |
| @@ -1518,8 +1535,10 @@ xfs_qm_shake( | |||
| 1518 | struct xfs_quotainfo *qi = | 1535 | struct xfs_quotainfo *qi = |
| 1519 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); | 1536 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); |
| 1520 | int nr_to_scan = sc->nr_to_scan; | 1537 | int nr_to_scan = sc->nr_to_scan; |
| 1538 | LIST_HEAD (buffer_list); | ||
| 1521 | LIST_HEAD (dispose_list); | 1539 | LIST_HEAD (dispose_list); |
| 1522 | struct xfs_dquot *dqp; | 1540 | struct xfs_dquot *dqp; |
| 1541 | int error; | ||
| 1523 | 1542 | ||
| 1524 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) | 1543 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) |
| 1525 | return 0; | 1544 | return 0; |
| @@ -1532,15 +1551,20 @@ xfs_qm_shake( | |||
| 1532 | break; | 1551 | break; |
| 1533 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, | 1552 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, |
| 1534 | q_lru); | 1553 | q_lru); |
| 1535 | xfs_qm_dqreclaim_one(dqp, &dispose_list); | 1554 | xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); |
| 1536 | } | 1555 | } |
| 1537 | mutex_unlock(&qi->qi_lru_lock); | 1556 | mutex_unlock(&qi->qi_lru_lock); |
| 1538 | 1557 | ||
| 1558 | error = xfs_buf_delwri_submit(&buffer_list); | ||
| 1559 | if (error) | ||
| 1560 | xfs_warn(NULL, "%s: dquot reclaim failed", __func__); | ||
| 1561 | |||
| 1539 | while (!list_empty(&dispose_list)) { | 1562 | while (!list_empty(&dispose_list)) { |
| 1540 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); | 1563 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); |
| 1541 | list_del_init(&dqp->q_lru); | 1564 | list_del_init(&dqp->q_lru); |
| 1542 | xfs_qm_dqfree_one(dqp); | 1565 | xfs_qm_dqfree_one(dqp); |
| 1543 | } | 1566 | } |
| 1567 | |||
| 1544 | out: | 1568 | out: |
| 1545 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; | 1569 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; |
| 1546 | } | 1570 | } |
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index e6986b5d80d8..6b39115bf145 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c | |||
| @@ -17,9 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_bit.h" | ||
| 21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index c4f396e437a8..858a3b186110 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
| @@ -22,7 +22,6 @@ | |||
| 22 | #include "xfs_fs.h" | 22 | #include "xfs_fs.h" |
| 23 | #include "xfs_bit.h" | 23 | #include "xfs_bit.h" |
| 24 | #include "xfs_log.h" | 24 | #include "xfs_log.h" |
| 25 | #include "xfs_inum.h" | ||
| 26 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
| 27 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
| 28 | #include "xfs_ag.h" | 27 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 7e76f537abb7..fed504fc2999 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_sb.h" | 19 | #include "xfs_sb.h" |
| 20 | #include "xfs_inum.h" | ||
| 21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
| 22 | #include "xfs_ag.h" | 21 | #include "xfs_ag.h" |
| 23 | #include "xfs_mount.h" | 22 | #include "xfs_mount.h" |
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index e44ef7ee8ce8..30ff5f401d28 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ca4f31534a0a..92d4331cd4f1 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
| @@ -34,7 +33,6 @@ | |||
| 34 | #include "xfs_rtalloc.h" | 33 | #include "xfs_rtalloc.h" |
| 35 | #include "xfs_fsops.h" | 34 | #include "xfs_fsops.h" |
| 36 | #include "xfs_error.h" | 35 | #include "xfs_error.h" |
| 37 | #include "xfs_rw.h" | ||
| 38 | #include "xfs_inode_item.h" | 36 | #include "xfs_inode_item.h" |
| 39 | #include "xfs_trans_space.h" | 37 | #include "xfs_trans_space.h" |
| 40 | #include "xfs_utils.h" | 38 | #include "xfs_utils.h" |
| @@ -1872,9 +1870,9 @@ xfs_growfs_rt( | |||
| 1872 | /* | 1870 | /* |
| 1873 | * Read in the last block of the device, make sure it exists. | 1871 | * Read in the last block of the device, make sure it exists. |
| 1874 | */ | 1872 | */ |
| 1875 | bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, | 1873 | bp = xfs_buf_read_uncached(mp->m_rtdev_targp, |
| 1876 | XFS_FSB_TO_BB(mp, nrblocks - 1), | 1874 | XFS_FSB_TO_BB(mp, nrblocks - 1), |
| 1877 | XFS_FSB_TO_B(mp, 1), 0); | 1875 | XFS_FSB_TO_BB(mp, 1), 0); |
| 1878 | if (!bp) | 1876 | if (!bp) |
| 1879 | return EIO; | 1877 | return EIO; |
| 1880 | xfs_buf_relse(bp); | 1878 | xfs_buf_relse(bp); |
| @@ -2219,9 +2217,9 @@ xfs_rtmount_init( | |||
| 2219 | (unsigned long long) mp->m_sb.sb_rblocks); | 2217 | (unsigned long long) mp->m_sb.sb_rblocks); |
| 2220 | return XFS_ERROR(EFBIG); | 2218 | return XFS_ERROR(EFBIG); |
| 2221 | } | 2219 | } |
| 2222 | bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, | 2220 | bp = xfs_buf_read_uncached(mp->m_rtdev_targp, |
| 2223 | d - XFS_FSB_TO_BB(mp, 1), | 2221 | d - XFS_FSB_TO_BB(mp, 1), |
| 2224 | XFS_FSB_TO_B(mp, 1), 0); | 2222 | XFS_FSB_TO_BB(mp, 1), 0); |
| 2225 | if (!bp) { | 2223 | if (!bp) { |
| 2226 | xfs_warn(mp, "realtime device size check failed"); | 2224 | xfs_warn(mp, "realtime device size check failed"); |
| 2227 | return EIO; | 2225 | return EIO; |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c deleted file mode 100644 index 597d044a09a1..000000000000 --- a/fs/xfs/xfs_rw.c +++ /dev/null | |||
| @@ -1,156 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
| 3 | * All Rights Reserved. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it would be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write the Free Software Foundation, | ||
| 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 17 | */ | ||
| 18 | #include "xfs.h" | ||
| 19 | #include "xfs_fs.h" | ||
| 20 | #include "xfs_types.h" | ||
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | ||
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | ||
| 25 | #include "xfs_sb.h" | ||
| 26 | #include "xfs_ag.h" | ||
| 27 | #include "xfs_mount.h" | ||
| 28 | #include "xfs_bmap_btree.h" | ||
| 29 | #include "xfs_dinode.h" | ||
| 30 | #include "xfs_inode.h" | ||
| 31 | #include "xfs_error.h" | ||
| 32 | #include "xfs_rw.h" | ||
| 33 | |||
| 34 | /* | ||
| 35 | * Force a shutdown of the filesystem instantly while keeping | ||
| 36 | * the filesystem consistent. We don't do an unmount here; just shutdown | ||
| 37 | * the shop, make sure that absolutely nothing persistent happens to | ||
| 38 | * this filesystem after this point. | ||
| 39 | */ | ||
| 40 | void | ||
| 41 | xfs_do_force_shutdown( | ||
| 42 | xfs_mount_t *mp, | ||
| 43 | int flags, | ||
| 44 | char *fname, | ||
| 45 | int lnnum) | ||
| 46 | { | ||
| 47 | int logerror; | ||
| 48 | |||
| 49 | logerror = flags & SHUTDOWN_LOG_IO_ERROR; | ||
| 50 | |||
| 51 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
| 52 | xfs_notice(mp, | ||
| 53 | "%s(0x%x) called from line %d of file %s. Return address = 0x%p", | ||
| 54 | __func__, flags, lnnum, fname, __return_address); | ||
| 55 | } | ||
| 56 | /* | ||
| 57 | * No need to duplicate efforts. | ||
| 58 | */ | ||
| 59 | if (XFS_FORCED_SHUTDOWN(mp) && !logerror) | ||
| 60 | return; | ||
| 61 | |||
| 62 | /* | ||
| 63 | * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't | ||
| 64 | * queue up anybody new on the log reservations, and wakes up | ||
| 65 | * everybody who's sleeping on log reservations to tell them | ||
| 66 | * the bad news. | ||
| 67 | */ | ||
| 68 | if (xfs_log_force_umount(mp, logerror)) | ||
| 69 | return; | ||
| 70 | |||
| 71 | if (flags & SHUTDOWN_CORRUPT_INCORE) { | ||
| 72 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, | ||
| 73 | "Corruption of in-memory data detected. Shutting down filesystem"); | ||
| 74 | if (XFS_ERRLEVEL_HIGH <= xfs_error_level) | ||
| 75 | xfs_stack_trace(); | ||
| 76 | } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
| 77 | if (logerror) { | ||
| 78 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, | ||
| 79 | "Log I/O Error Detected. Shutting down filesystem"); | ||
| 80 | } else if (flags & SHUTDOWN_DEVICE_REQ) { | ||
| 81 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, | ||
| 82 | "All device paths lost. Shutting down filesystem"); | ||
| 83 | } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { | ||
| 84 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, | ||
| 85 | "I/O Error Detected. Shutting down filesystem"); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
| 89 | xfs_alert(mp, | ||
| 90 | "Please umount the filesystem and rectify the problem(s)"); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | |||
| 94 | /* | ||
| 95 | * This isn't an absolute requirement, but it is | ||
| 96 | * just a good idea to call xfs_read_buf instead of | ||
| 97 | * directly doing a read_buf call. For one, we shouldn't | ||
| 98 | * be doing this disk read if we are in SHUTDOWN state anyway, | ||
| 99 | * so this stops that from happening. Secondly, this does all | ||
| 100 | * the error checking stuff and the brelse if appropriate for | ||
| 101 | * the caller, so the code can be a little leaner. | ||
| 102 | */ | ||
| 103 | |||
| 104 | int | ||
| 105 | xfs_read_buf( | ||
| 106 | struct xfs_mount *mp, | ||
| 107 | xfs_buftarg_t *target, | ||
| 108 | xfs_daddr_t blkno, | ||
| 109 | int len, | ||
| 110 | uint flags, | ||
| 111 | xfs_buf_t **bpp) | ||
| 112 | { | ||
| 113 | xfs_buf_t *bp; | ||
| 114 | int error; | ||
| 115 | |||
| 116 | if (!flags) | ||
| 117 | flags = XBF_LOCK | XBF_MAPPED; | ||
| 118 | |||
| 119 | bp = xfs_buf_read(target, blkno, len, flags); | ||
| 120 | if (!bp) | ||
| 121 | return XFS_ERROR(EIO); | ||
| 122 | error = bp->b_error; | ||
| 123 | if (!error && !XFS_FORCED_SHUTDOWN(mp)) { | ||
| 124 | *bpp = bp; | ||
| 125 | } else { | ||
| 126 | *bpp = NULL; | ||
| 127 | if (error) { | ||
| 128 | xfs_buf_ioerror_alert(bp, __func__); | ||
| 129 | } else { | ||
| 130 | error = XFS_ERROR(EIO); | ||
| 131 | } | ||
| 132 | if (bp) { | ||
| 133 | XFS_BUF_UNDONE(bp); | ||
| 134 | xfs_buf_stale(bp); | ||
| 135 | /* | ||
| 136 | * brelse clears B_ERROR and b_error | ||
| 137 | */ | ||
| 138 | xfs_buf_relse(bp); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | return (error); | ||
| 142 | } | ||
| 143 | |||
| 144 | /* | ||
| 145 | * helper function to extract extent size hint from inode | ||
| 146 | */ | ||
| 147 | xfs_extlen_t | ||
| 148 | xfs_get_extsz_hint( | ||
| 149 | struct xfs_inode *ip) | ||
| 150 | { | ||
| 151 | if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) | ||
| 152 | return ip->i_d.di_extsize; | ||
| 153 | if (XFS_IS_REALTIME_INODE(ip)) | ||
| 154 | return ip->i_mount->m_sb.sb_rextsize; | ||
| 155 | return 0; | ||
| 156 | } | ||
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h deleted file mode 100644 index bbdb9ad6a4ba..000000000000 --- a/fs/xfs/xfs_rw.h +++ /dev/null | |||
| @@ -1,47 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
| 3 | * All Rights Reserved. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it would be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write the Free Software Foundation, | ||
| 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 17 | */ | ||
| 18 | #ifndef __XFS_RW_H__ | ||
| 19 | #define __XFS_RW_H__ | ||
| 20 | |||
| 21 | struct xfs_buf; | ||
| 22 | struct xfs_inode; | ||
| 23 | struct xfs_mount; | ||
| 24 | |||
| 25 | /* | ||
| 26 | * Convert the given file system block to a disk block. | ||
| 27 | * We have to treat it differently based on whether the | ||
| 28 | * file is a real time file or not, because the bmap code | ||
| 29 | * does. | ||
| 30 | */ | ||
| 31 | static inline xfs_daddr_t | ||
| 32 | xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) | ||
| 33 | { | ||
| 34 | return (XFS_IS_REALTIME_INODE(ip) ? \ | ||
| 35 | (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ | ||
| 36 | XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); | ||
| 37 | } | ||
| 38 | |||
| 39 | /* | ||
| 40 | * Prototypes for functions in xfs_rw.c. | ||
| 41 | */ | ||
| 42 | extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, | ||
| 43 | xfs_daddr_t blkno, int len, uint flags, | ||
| 44 | struct xfs_buf **bpp); | ||
| 45 | extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); | ||
| 46 | |||
| 47 | #endif /* __XFS_RW_H__ */ | ||
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index dab9a5f6dfd6..2fcfd5b0b046 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include "xfs.h" | 19 | #include "xfs.h" |
| 20 | #include "xfs_bit.h" | ||
| 21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | 21 | #include "xfs_inum.h" |
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| @@ -622,7 +621,7 @@ void | |||
| 622 | xfs_blkdev_issue_flush( | 621 | xfs_blkdev_issue_flush( |
| 623 | xfs_buftarg_t *buftarg) | 622 | xfs_buftarg_t *buftarg) |
| 624 | { | 623 | { |
| 625 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); | 624 | blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL); |
| 626 | } | 625 | } |
| 627 | 626 | ||
| 628 | STATIC void | 627 | STATIC void |
| @@ -773,8 +772,14 @@ xfs_init_mount_workqueues( | |||
| 773 | if (!mp->m_unwritten_workqueue) | 772 | if (!mp->m_unwritten_workqueue) |
| 774 | goto out_destroy_data_iodone_queue; | 773 | goto out_destroy_data_iodone_queue; |
| 775 | 774 | ||
| 775 | mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", | ||
| 776 | WQ_MEM_RECLAIM, 0, mp->m_fsname); | ||
| 777 | if (!mp->m_cil_workqueue) | ||
| 778 | goto out_destroy_unwritten; | ||
| 776 | return 0; | 779 | return 0; |
| 777 | 780 | ||
| 781 | out_destroy_unwritten: | ||
| 782 | destroy_workqueue(mp->m_unwritten_workqueue); | ||
| 778 | out_destroy_data_iodone_queue: | 783 | out_destroy_data_iodone_queue: |
| 779 | destroy_workqueue(mp->m_data_workqueue); | 784 | destroy_workqueue(mp->m_data_workqueue); |
| 780 | out: | 785 | out: |
| @@ -785,6 +790,7 @@ STATIC void | |||
| 785 | xfs_destroy_mount_workqueues( | 790 | xfs_destroy_mount_workqueues( |
| 786 | struct xfs_mount *mp) | 791 | struct xfs_mount *mp) |
| 787 | { | 792 | { |
| 793 | destroy_workqueue(mp->m_cil_workqueue); | ||
| 788 | destroy_workqueue(mp->m_data_workqueue); | 794 | destroy_workqueue(mp->m_data_workqueue); |
| 789 | destroy_workqueue(mp->m_unwritten_workqueue); | 795 | destroy_workqueue(mp->m_unwritten_workqueue); |
| 790 | } | 796 | } |
| @@ -981,18 +987,9 @@ xfs_fs_put_super( | |||
| 981 | { | 987 | { |
| 982 | struct xfs_mount *mp = XFS_M(sb); | 988 | struct xfs_mount *mp = XFS_M(sb); |
| 983 | 989 | ||
| 984 | xfs_syncd_stop(mp); | ||
| 985 | |||
| 986 | /* | ||
| 987 | * Blow away any referenced inode in the filestreams cache. | ||
| 988 | * This can and will cause log traffic as inodes go inactive | ||
| 989 | * here. | ||
| 990 | */ | ||
| 991 | xfs_filestream_unmount(mp); | 990 | xfs_filestream_unmount(mp); |
| 992 | |||
| 993 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
| 994 | |||
| 995 | xfs_unmountfs(mp); | 991 | xfs_unmountfs(mp); |
| 992 | xfs_syncd_stop(mp); | ||
| 996 | xfs_freesb(mp); | 993 | xfs_freesb(mp); |
| 997 | xfs_icsb_destroy_counters(mp); | 994 | xfs_icsb_destroy_counters(mp); |
| 998 | xfs_destroy_mount_workqueues(mp); | 995 | xfs_destroy_mount_workqueues(mp); |
| @@ -1072,7 +1069,7 @@ xfs_fs_statfs( | |||
| 1072 | 1069 | ||
| 1073 | spin_unlock(&mp->m_sb_lock); | 1070 | spin_unlock(&mp->m_sb_lock); |
| 1074 | 1071 | ||
| 1075 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || | 1072 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
| 1076 | ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == | 1073 | ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == |
| 1077 | (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) | 1074 | (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) |
| 1078 | xfs_qm_statvfs(ip, statp); | 1075 | xfs_qm_statvfs(ip, statp); |
| @@ -1362,31 +1359,32 @@ xfs_fs_fill_super( | |||
| 1362 | sb->s_time_gran = 1; | 1359 | sb->s_time_gran = 1; |
| 1363 | set_posix_acl_flag(sb); | 1360 | set_posix_acl_flag(sb); |
| 1364 | 1361 | ||
| 1365 | error = xfs_mountfs(mp); | 1362 | error = xfs_syncd_init(mp); |
| 1366 | if (error) | 1363 | if (error) |
| 1367 | goto out_filestream_unmount; | 1364 | goto out_filestream_unmount; |
| 1368 | 1365 | ||
| 1369 | error = xfs_syncd_init(mp); | 1366 | error = xfs_mountfs(mp); |
| 1370 | if (error) | 1367 | if (error) |
| 1371 | goto out_unmount; | 1368 | goto out_syncd_stop; |
| 1372 | 1369 | ||
| 1373 | root = igrab(VFS_I(mp->m_rootip)); | 1370 | root = igrab(VFS_I(mp->m_rootip)); |
| 1374 | if (!root) { | 1371 | if (!root) { |
| 1375 | error = ENOENT; | 1372 | error = ENOENT; |
| 1376 | goto out_syncd_stop; | 1373 | goto out_unmount; |
| 1377 | } | 1374 | } |
| 1378 | if (is_bad_inode(root)) { | 1375 | if (is_bad_inode(root)) { |
| 1379 | error = EINVAL; | 1376 | error = EINVAL; |
| 1380 | goto out_syncd_stop; | 1377 | goto out_unmount; |
| 1381 | } | 1378 | } |
| 1382 | sb->s_root = d_make_root(root); | 1379 | sb->s_root = d_make_root(root); |
| 1383 | if (!sb->s_root) { | 1380 | if (!sb->s_root) { |
| 1384 | error = ENOMEM; | 1381 | error = ENOMEM; |
| 1385 | goto out_syncd_stop; | 1382 | goto out_unmount; |
| 1386 | } | 1383 | } |
| 1387 | 1384 | ||
| 1388 | return 0; | 1385 | return 0; |
| 1389 | 1386 | out_syncd_stop: | |
| 1387 | xfs_syncd_stop(mp); | ||
| 1390 | out_filestream_unmount: | 1388 | out_filestream_unmount: |
| 1391 | xfs_filestream_unmount(mp); | 1389 | xfs_filestream_unmount(mp); |
| 1392 | out_free_sb: | 1390 | out_free_sb: |
| @@ -1403,19 +1401,10 @@ out_destroy_workqueues: | |||
| 1403 | out: | 1401 | out: |
| 1404 | return -error; | 1402 | return -error; |
| 1405 | 1403 | ||
| 1406 | out_syncd_stop: | ||
| 1407 | xfs_syncd_stop(mp); | ||
| 1408 | out_unmount: | 1404 | out_unmount: |
| 1409 | /* | ||
| 1410 | * Blow away any referenced inode in the filestreams cache. | ||
| 1411 | * This can and will cause log traffic as inodes go inactive | ||
| 1412 | * here. | ||
| 1413 | */ | ||
| 1414 | xfs_filestream_unmount(mp); | 1405 | xfs_filestream_unmount(mp); |
| 1415 | |||
| 1416 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
| 1417 | |||
| 1418 | xfs_unmountfs(mp); | 1406 | xfs_unmountfs(mp); |
| 1407 | xfs_syncd_stop(mp); | ||
| 1419 | goto out_free_sb; | 1408 | goto out_free_sb; |
| 1420 | } | 1409 | } |
| 1421 | 1410 | ||
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 205ebcb34d9e..c9d3409c5ca3 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| @@ -241,45 +240,6 @@ xfs_sync_inode_data( | |||
| 241 | return error; | 240 | return error; |
| 242 | } | 241 | } |
| 243 | 242 | ||
| 244 | STATIC int | ||
| 245 | xfs_sync_inode_attr( | ||
| 246 | struct xfs_inode *ip, | ||
| 247 | struct xfs_perag *pag, | ||
| 248 | int flags) | ||
| 249 | { | ||
| 250 | int error = 0; | ||
| 251 | |||
| 252 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
| 253 | if (xfs_inode_clean(ip)) | ||
| 254 | goto out_unlock; | ||
| 255 | if (!xfs_iflock_nowait(ip)) { | ||
| 256 | if (!(flags & SYNC_WAIT)) | ||
| 257 | goto out_unlock; | ||
| 258 | xfs_iflock(ip); | ||
| 259 | } | ||
| 260 | |||
| 261 | if (xfs_inode_clean(ip)) { | ||
| 262 | xfs_ifunlock(ip); | ||
| 263 | goto out_unlock; | ||
| 264 | } | ||
| 265 | |||
| 266 | error = xfs_iflush(ip, flags); | ||
| 267 | |||
| 268 | /* | ||
| 269 | * We don't want to try again on non-blocking flushes that can't run | ||
| 270 | * again immediately. If an inode really must be written, then that's | ||
| 271 | * what the SYNC_WAIT flag is for. | ||
| 272 | */ | ||
| 273 | if (error == EAGAIN) { | ||
| 274 | ASSERT(!(flags & SYNC_WAIT)); | ||
| 275 | error = 0; | ||
| 276 | } | ||
| 277 | |||
| 278 | out_unlock: | ||
| 279 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
| 280 | return error; | ||
| 281 | } | ||
| 282 | |||
| 283 | /* | 243 | /* |
| 284 | * Write out pagecache data for the whole filesystem. | 244 | * Write out pagecache data for the whole filesystem. |
| 285 | */ | 245 | */ |
| @@ -300,19 +260,6 @@ xfs_sync_data( | |||
| 300 | return 0; | 260 | return 0; |
| 301 | } | 261 | } |
| 302 | 262 | ||
| 303 | /* | ||
| 304 | * Write out inode metadata (attributes) for the whole filesystem. | ||
| 305 | */ | ||
| 306 | STATIC int | ||
| 307 | xfs_sync_attr( | ||
| 308 | struct xfs_mount *mp, | ||
| 309 | int flags) | ||
| 310 | { | ||
| 311 | ASSERT((flags & ~SYNC_WAIT) == 0); | ||
| 312 | |||
| 313 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); | ||
| 314 | } | ||
| 315 | |||
| 316 | STATIC int | 263 | STATIC int |
| 317 | xfs_sync_fsdata( | 264 | xfs_sync_fsdata( |
| 318 | struct xfs_mount *mp) | 265 | struct xfs_mount *mp) |
| @@ -350,7 +297,7 @@ xfs_sync_fsdata( | |||
| 350 | * First stage of freeze - no writers will make progress now we are here, | 297 | * First stage of freeze - no writers will make progress now we are here, |
| 351 | * so we flush delwri and delalloc buffers here, then wait for all I/O to | 298 | * so we flush delwri and delalloc buffers here, then wait for all I/O to |
| 352 | * complete. Data is frozen at that point. Metadata is not frozen, | 299 | * complete. Data is frozen at that point. Metadata is not frozen, |
| 353 | * transactions can still occur here so don't bother flushing the buftarg | 300 | * transactions can still occur here so don't bother emptying the AIL |
| 354 | * because it'll just get dirty again. | 301 | * because it'll just get dirty again. |
| 355 | */ | 302 | */ |
| 356 | int | 303 | int |
| @@ -365,47 +312,13 @@ xfs_quiesce_data( | |||
| 365 | /* write superblock and hoover up shutdown errors */ | 312 | /* write superblock and hoover up shutdown errors */ |
| 366 | error = xfs_sync_fsdata(mp); | 313 | error = xfs_sync_fsdata(mp); |
| 367 | 314 | ||
| 368 | /* make sure all delwri buffers are written out */ | ||
| 369 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
| 370 | |||
| 371 | /* mark the log as covered if needed */ | 315 | /* mark the log as covered if needed */ |
| 372 | if (xfs_log_need_covered(mp)) | 316 | if (xfs_log_need_covered(mp)) |
| 373 | error2 = xfs_fs_log_dummy(mp); | 317 | error2 = xfs_fs_log_dummy(mp); |
| 374 | 318 | ||
| 375 | /* flush data-only devices */ | ||
| 376 | if (mp->m_rtdev_targp) | ||
| 377 | xfs_flush_buftarg(mp->m_rtdev_targp, 1); | ||
| 378 | |||
| 379 | return error ? error : error2; | 319 | return error ? error : error2; |
| 380 | } | 320 | } |
| 381 | 321 | ||
| 382 | STATIC void | ||
| 383 | xfs_quiesce_fs( | ||
| 384 | struct xfs_mount *mp) | ||
| 385 | { | ||
| 386 | int count = 0, pincount; | ||
| 387 | |||
| 388 | xfs_reclaim_inodes(mp, 0); | ||
| 389 | xfs_flush_buftarg(mp->m_ddev_targp, 0); | ||
| 390 | |||
| 391 | /* | ||
| 392 | * This loop must run at least twice. The first instance of the loop | ||
| 393 | * will flush most meta data but that will generate more meta data | ||
| 394 | * (typically directory updates). Which then must be flushed and | ||
| 395 | * logged before we can write the unmount record. We also so sync | ||
| 396 | * reclaim of inodes to catch any that the above delwri flush skipped. | ||
| 397 | */ | ||
| 398 | do { | ||
| 399 | xfs_reclaim_inodes(mp, SYNC_WAIT); | ||
| 400 | xfs_sync_attr(mp, SYNC_WAIT); | ||
| 401 | pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
| 402 | if (!pincount) { | ||
| 403 | delay(50); | ||
| 404 | count++; | ||
| 405 | } | ||
| 406 | } while (count < 2); | ||
| 407 | } | ||
| 408 | |||
| 409 | /* | 322 | /* |
| 410 | * Second stage of a quiesce. The data is already synced, now we have to take | 323 | * Second stage of a quiesce. The data is already synced, now we have to take |
| 411 | * care of the metadata. New transactions are already blocked, so we need to | 324 | * care of the metadata. New transactions are already blocked, so we need to |
| @@ -421,8 +334,12 @@ xfs_quiesce_attr( | |||
| 421 | while (atomic_read(&mp->m_active_trans) > 0) | 334 | while (atomic_read(&mp->m_active_trans) > 0) |
| 422 | delay(100); | 335 | delay(100); |
| 423 | 336 | ||
| 424 | /* flush inodes and push all remaining buffers out to disk */ | 337 | /* reclaim inodes to do any IO before the freeze completes */ |
| 425 | xfs_quiesce_fs(mp); | 338 | xfs_reclaim_inodes(mp, 0); |
| 339 | xfs_reclaim_inodes(mp, SYNC_WAIT); | ||
| 340 | |||
| 341 | /* flush all pending changes from the AIL */ | ||
| 342 | xfs_ail_push_all_sync(mp->m_ail); | ||
| 426 | 343 | ||
| 427 | /* | 344 | /* |
| 428 | * Just warn here till VFS can correctly support | 345 | * Just warn here till VFS can correctly support |
| @@ -436,7 +353,12 @@ xfs_quiesce_attr( | |||
| 436 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " | 353 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " |
| 437 | "Frozen image may not be consistent."); | 354 | "Frozen image may not be consistent."); |
| 438 | xfs_log_unmount_write(mp); | 355 | xfs_log_unmount_write(mp); |
| 439 | xfs_unmountfs_writesb(mp); | 356 | |
| 357 | /* | ||
| 358 | * At this point we might have modified the superblock again and thus | ||
| 359 | * added an item to the AIL, thus flush it again. | ||
| 360 | */ | ||
| 361 | xfs_ail_push_all_sync(mp->m_ail); | ||
| 440 | } | 362 | } |
| 441 | 363 | ||
| 442 | static void | 364 | static void |
| @@ -460,16 +382,27 @@ xfs_sync_worker( | |||
| 460 | struct xfs_mount, m_sync_work); | 382 | struct xfs_mount, m_sync_work); |
| 461 | int error; | 383 | int error; |
| 462 | 384 | ||
| 463 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | 385 | /* |
| 464 | /* dgc: errors ignored here */ | 386 | * We shouldn't write/force the log if we are in the mount/unmount |
| 465 | if (mp->m_super->s_frozen == SB_UNFROZEN && | 387 | * process or on a read only filesystem. The workqueue still needs to be |
| 466 | xfs_log_need_covered(mp)) | 388 | * active in both cases, however, because it is used for inode reclaim |
| 467 | error = xfs_fs_log_dummy(mp); | 389 | * during these times. Use the s_umount semaphore to provide exclusion |
| 468 | else | 390 | * with unmount. |
| 469 | xfs_log_force(mp, 0); | 391 | */ |
| 470 | 392 | if (down_read_trylock(&mp->m_super->s_umount)) { | |
| 471 | /* start pushing all the metadata that is currently dirty */ | 393 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { |
| 472 | xfs_ail_push_all(mp->m_ail); | 394 | /* dgc: errors ignored here */ |
| 395 | if (mp->m_super->s_frozen == SB_UNFROZEN && | ||
| 396 | xfs_log_need_covered(mp)) | ||
| 397 | error = xfs_fs_log_dummy(mp); | ||
| 398 | else | ||
| 399 | xfs_log_force(mp, 0); | ||
| 400 | |||
| 401 | /* start pushing all the metadata that is currently | ||
| 402 | * dirty */ | ||
| 403 | xfs_ail_push_all(mp->m_ail); | ||
| 404 | } | ||
| 405 | up_read(&mp->m_super->s_umount); | ||
| 473 | } | 406 | } |
| 474 | 407 | ||
| 475 | /* queue us up again */ | 408 | /* queue us up again */ |
| @@ -488,14 +421,6 @@ xfs_syncd_queue_reclaim( | |||
| 488 | struct xfs_mount *mp) | 421 | struct xfs_mount *mp) |
| 489 | { | 422 | { |
| 490 | 423 | ||
| 491 | /* | ||
| 492 | * We can have inodes enter reclaim after we've shut down the syncd | ||
| 493 | * workqueue during unmount, so don't allow reclaim work to be queued | ||
| 494 | * during unmount. | ||
| 495 | */ | ||
| 496 | if (!(mp->m_super->s_flags & MS_ACTIVE)) | ||
| 497 | return; | ||
| 498 | |||
| 499 | rcu_read_lock(); | 424 | rcu_read_lock(); |
| 500 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { | 425 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { |
| 501 | queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, | 426 | queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, |
| @@ -564,7 +489,6 @@ xfs_syncd_init( | |||
| 564 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); | 489 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); |
| 565 | 490 | ||
| 566 | xfs_syncd_queue_sync(mp); | 491 | xfs_syncd_queue_sync(mp); |
| 567 | xfs_syncd_queue_reclaim(mp); | ||
| 568 | 492 | ||
| 569 | return 0; | 493 | return 0; |
| 570 | } | 494 | } |
| @@ -702,11 +626,8 @@ xfs_reclaim_inode_grab( | |||
| 702 | } | 626 | } |
| 703 | 627 | ||
| 704 | /* | 628 | /* |
| 705 | * Inodes in different states need to be treated differently, and the return | 629 | * Inodes in different states need to be treated differently. The following |
| 706 | * value of xfs_iflush is not sufficient to get this right. The following table | 630 | * table lists the inode states and the reclaim actions necessary: |
| 707 | * lists the inode states and the reclaim actions necessary for non-blocking | ||
| 708 | * reclaim: | ||
| 709 | * | ||
| 710 | * | 631 | * |
| 711 | * inode state iflush ret required action | 632 | * inode state iflush ret required action |
| 712 | * --------------- ---------- --------------- | 633 | * --------------- ---------- --------------- |
| @@ -716,39 +637,31 @@ xfs_reclaim_inode_grab( | |||
| 716 | * stale, unpinned 0 reclaim | 637 | * stale, unpinned 0 reclaim |
| 717 | * clean, pinned(*) 0 requeue | 638 | * clean, pinned(*) 0 requeue |
| 718 | * stale, pinned EAGAIN requeue | 639 | * stale, pinned EAGAIN requeue |
| 719 | * dirty, delwri ok 0 requeue | 640 | * dirty, async - requeue |
| 720 | * dirty, delwri blocked EAGAIN requeue | 641 | * dirty, sync 0 reclaim |
| 721 | * dirty, sync flush 0 reclaim | ||
| 722 | * | 642 | * |
| 723 | * (*) dgc: I don't think the clean, pinned state is possible but it gets | 643 | * (*) dgc: I don't think the clean, pinned state is possible but it gets |
| 724 | * handled anyway given the order of checks implemented. | 644 | * handled anyway given the order of checks implemented. |
| 725 | * | 645 | * |
| 726 | * As can be seen from the table, the return value of xfs_iflush() is not | ||
| 727 | * sufficient to correctly decide the reclaim action here. The checks in | ||
| 728 | * xfs_iflush() might look like duplicates, but they are not. | ||
| 729 | * | ||
| 730 | * Also, because we get the flush lock first, we know that any inode that has | 646 | * Also, because we get the flush lock first, we know that any inode that has |
| 731 | * been flushed delwri has had the flush completed by the time we check that | 647 | * been flushed delwri has had the flush completed by the time we check that |
| 732 | * the inode is clean. The clean inode check needs to be done before flushing | 648 | * the inode is clean. |
| 733 | * the inode delwri otherwise we would loop forever requeuing clean inodes as | ||
| 734 | * we cannot tell apart a successful delwri flush and a clean inode from the | ||
| 735 | * return value of xfs_iflush(). | ||
| 736 | * | 649 | * |
| 737 | * Note that because the inode is flushed delayed write by background | 650 | * Note that because the inode is flushed delayed write by AIL pushing, the |
| 738 | * writeback, the flush lock may already be held here and waiting on it can | 651 | * flush lock may already be held here and waiting on it can result in very |
| 739 | * result in very long latencies. Hence for sync reclaims, where we wait on the | 652 | * long latencies. Hence for sync reclaims, where we wait on the flush lock, |
| 740 | * flush lock, the caller should push out delayed write inodes first before | 653 | * the caller should push the AIL first before trying to reclaim inodes to |
| 741 | * trying to reclaim them to minimise the amount of time spent waiting. For | 654 | * minimise the amount of time spent waiting. For background relaim, we only |
| 742 | * background relaim, we just requeue the inode for the next pass. | 655 | * bother to reclaim clean inodes anyway. |
| 743 | * | 656 | * |
| 744 | * Hence the order of actions after gaining the locks should be: | 657 | * Hence the order of actions after gaining the locks should be: |
| 745 | * bad => reclaim | 658 | * bad => reclaim |
| 746 | * shutdown => unpin and reclaim | 659 | * shutdown => unpin and reclaim |
| 747 | * pinned, delwri => requeue | 660 | * pinned, async => requeue |
| 748 | * pinned, sync => unpin | 661 | * pinned, sync => unpin |
| 749 | * stale => reclaim | 662 | * stale => reclaim |
| 750 | * clean => reclaim | 663 | * clean => reclaim |
| 751 | * dirty, delwri => flush and requeue | 664 | * dirty, async => requeue |
| 752 | * dirty, sync => flush, wait and reclaim | 665 | * dirty, sync => flush, wait and reclaim |
| 753 | */ | 666 | */ |
| 754 | STATIC int | 667 | STATIC int |
| @@ -757,7 +670,8 @@ xfs_reclaim_inode( | |||
| 757 | struct xfs_perag *pag, | 670 | struct xfs_perag *pag, |
| 758 | int sync_mode) | 671 | int sync_mode) |
| 759 | { | 672 | { |
| 760 | int error; | 673 | struct xfs_buf *bp = NULL; |
| 674 | int error; | ||
| 761 | 675 | ||
| 762 | restart: | 676 | restart: |
| 763 | error = 0; | 677 | error = 0; |
| @@ -765,17 +679,6 @@ restart: | |||
| 765 | if (!xfs_iflock_nowait(ip)) { | 679 | if (!xfs_iflock_nowait(ip)) { |
| 766 | if (!(sync_mode & SYNC_WAIT)) | 680 | if (!(sync_mode & SYNC_WAIT)) |
| 767 | goto out; | 681 | goto out; |
| 768 | |||
| 769 | /* | ||
| 770 | * If we only have a single dirty inode in a cluster there is | ||
| 771 | * a fair chance that the AIL push may have pushed it into | ||
| 772 | * the buffer, but xfsbufd won't touch it until 30 seconds | ||
| 773 | * from now, and thus we will lock up here. | ||
| 774 | * | ||
| 775 | * Promote the inode buffer to the front of the delwri list | ||
| 776 | * and wake up xfsbufd now. | ||
| 777 | */ | ||
| 778 | xfs_promote_inode(ip); | ||
| 779 | xfs_iflock(ip); | 682 | xfs_iflock(ip); |
| 780 | } | 683 | } |
| 781 | 684 | ||
| @@ -783,13 +686,12 @@ restart: | |||
| 783 | goto reclaim; | 686 | goto reclaim; |
| 784 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 687 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
| 785 | xfs_iunpin_wait(ip); | 688 | xfs_iunpin_wait(ip); |
| 689 | xfs_iflush_abort(ip, false); | ||
| 786 | goto reclaim; | 690 | goto reclaim; |
| 787 | } | 691 | } |
| 788 | if (xfs_ipincount(ip)) { | 692 | if (xfs_ipincount(ip)) { |
| 789 | if (!(sync_mode & SYNC_WAIT)) { | 693 | if (!(sync_mode & SYNC_WAIT)) |
| 790 | xfs_ifunlock(ip); | 694 | goto out_ifunlock; |
| 791 | goto out; | ||
| 792 | } | ||
| 793 | xfs_iunpin_wait(ip); | 695 | xfs_iunpin_wait(ip); |
| 794 | } | 696 | } |
| 795 | if (xfs_iflags_test(ip, XFS_ISTALE)) | 697 | if (xfs_iflags_test(ip, XFS_ISTALE)) |
| @@ -798,60 +700,42 @@ restart: | |||
| 798 | goto reclaim; | 700 | goto reclaim; |
| 799 | 701 | ||
| 800 | /* | 702 | /* |
| 703 | * Never flush out dirty data during non-blocking reclaim, as it would | ||
| 704 | * just contend with AIL pushing trying to do the same job. | ||
| 705 | */ | ||
| 706 | if (!(sync_mode & SYNC_WAIT)) | ||
| 707 | goto out_ifunlock; | ||
| 708 | |||
| 709 | /* | ||
| 801 | * Now we have an inode that needs flushing. | 710 | * Now we have an inode that needs flushing. |
| 802 | * | 711 | * |
| 803 | * We do a nonblocking flush here even if we are doing a SYNC_WAIT | 712 | * Note that xfs_iflush will never block on the inode buffer lock, as |
| 804 | * reclaim as we can deadlock with inode cluster removal. | ||
| 805 | * xfs_ifree_cluster() can lock the inode buffer before it locks the | 713 | * xfs_ifree_cluster() can lock the inode buffer before it locks the |
| 806 | * ip->i_lock, and we are doing the exact opposite here. As a result, | 714 | * ip->i_lock, and we are doing the exact opposite here. As a result, |
| 807 | * doing a blocking xfs_itobp() to get the cluster buffer will result | 715 | * doing a blocking xfs_itobp() to get the cluster buffer would result |
| 808 | * in an ABBA deadlock with xfs_ifree_cluster(). | 716 | * in an ABBA deadlock with xfs_ifree_cluster(). |
| 809 | * | 717 | * |
| 810 | * As xfs_ifree_cluser() must gather all inodes that are active in the | 718 | * As xfs_ifree_cluser() must gather all inodes that are active in the |
| 811 | * cache to mark them stale, if we hit this case we don't actually want | 719 | * cache to mark them stale, if we hit this case we don't actually want |
| 812 | * to do IO here - we want the inode marked stale so we can simply | 720 | * to do IO here - we want the inode marked stale so we can simply |
| 813 | * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, | 721 | * reclaim it. Hence if we get an EAGAIN error here, just unlock the |
| 814 | * just unlock the inode, back off and try again. Hopefully the next | 722 | * inode, back off and try again. Hopefully the next pass through will |
| 815 | * pass through will see the stale flag set on the inode. | 723 | * see the stale flag set on the inode. |
| 816 | */ | 724 | */ |
| 817 | error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); | 725 | error = xfs_iflush(ip, &bp); |
| 818 | if (sync_mode & SYNC_WAIT) { | 726 | if (error == EAGAIN) { |
| 819 | if (error == EAGAIN) { | 727 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| 820 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 728 | /* backoff longer than in xfs_ifree_cluster */ |
| 821 | /* backoff longer than in xfs_ifree_cluster */ | 729 | delay(2); |
| 822 | delay(2); | 730 | goto restart; |
| 823 | goto restart; | ||
| 824 | } | ||
| 825 | xfs_iflock(ip); | ||
| 826 | goto reclaim; | ||
| 827 | } | 731 | } |
| 828 | 732 | ||
| 829 | /* | 733 | if (!error) { |
| 830 | * When we have to flush an inode but don't have SYNC_WAIT set, we | 734 | error = xfs_bwrite(bp); |
| 831 | * flush the inode out using a delwri buffer and wait for the next | 735 | xfs_buf_relse(bp); |
| 832 | * call into reclaim to find it in a clean state instead of waiting for | ||
| 833 | * it now. We also don't return errors here - if the error is transient | ||
| 834 | * then the next reclaim pass will flush the inode, and if the error | ||
| 835 | * is permanent then the next sync reclaim will reclaim the inode and | ||
| 836 | * pass on the error. | ||
| 837 | */ | ||
| 838 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
| 839 | xfs_warn(ip->i_mount, | ||
| 840 | "inode 0x%llx background reclaim flush failed with %d", | ||
| 841 | (long long)ip->i_ino, error); | ||
| 842 | } | 736 | } |
| 843 | out: | ||
| 844 | xfs_iflags_clear(ip, XFS_IRECLAIM); | ||
| 845 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 846 | /* | ||
| 847 | * We could return EAGAIN here to make reclaim rescan the inode tree in | ||
| 848 | * a short while. However, this just burns CPU time scanning the tree | ||
| 849 | * waiting for IO to complete and xfssyncd never goes back to the idle | ||
| 850 | * state. Instead, return 0 to let the next scheduled background reclaim | ||
| 851 | * attempt to reclaim the inode again. | ||
| 852 | */ | ||
| 853 | return 0; | ||
| 854 | 737 | ||
| 738 | xfs_iflock(ip); | ||
| 855 | reclaim: | 739 | reclaim: |
| 856 | xfs_ifunlock(ip); | 740 | xfs_ifunlock(ip); |
| 857 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 741 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| @@ -884,8 +768,21 @@ reclaim: | |||
| 884 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 768 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| 885 | 769 | ||
| 886 | xfs_inode_free(ip); | 770 | xfs_inode_free(ip); |
| 887 | |||
| 888 | return error; | 771 | return error; |
| 772 | |||
| 773 | out_ifunlock: | ||
| 774 | xfs_ifunlock(ip); | ||
| 775 | out: | ||
| 776 | xfs_iflags_clear(ip, XFS_IRECLAIM); | ||
| 777 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 778 | /* | ||
| 779 | * We could return EAGAIN here to make reclaim rescan the inode tree in | ||
| 780 | * a short while. However, this just burns CPU time scanning the tree | ||
| 781 | * waiting for IO to complete and xfssyncd never goes back to the idle | ||
| 782 | * state. Instead, return 0 to let the next scheduled background reclaim | ||
| 783 | * attempt to reclaim the inode again. | ||
| 784 | */ | ||
| 785 | return 0; | ||
| 889 | } | 786 | } |
| 890 | 787 | ||
| 891 | /* | 788 | /* |
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 9010ce885e6a..624bedd81357 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c | |||
| @@ -18,9 +18,7 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 06838c42b2a0..7cf9d3529e51 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
| @@ -281,7 +281,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, | |||
| 281 | TP_STRUCT__entry( | 281 | TP_STRUCT__entry( |
| 282 | __field(dev_t, dev) | 282 | __field(dev_t, dev) |
| 283 | __field(xfs_daddr_t, bno) | 283 | __field(xfs_daddr_t, bno) |
| 284 | __field(size_t, buffer_length) | 284 | __field(int, nblks) |
| 285 | __field(int, hold) | 285 | __field(int, hold) |
| 286 | __field(int, pincount) | 286 | __field(int, pincount) |
| 287 | __field(unsigned, lockval) | 287 | __field(unsigned, lockval) |
| @@ -291,18 +291,18 @@ DECLARE_EVENT_CLASS(xfs_buf_class, | |||
| 291 | TP_fast_assign( | 291 | TP_fast_assign( |
| 292 | __entry->dev = bp->b_target->bt_dev; | 292 | __entry->dev = bp->b_target->bt_dev; |
| 293 | __entry->bno = bp->b_bn; | 293 | __entry->bno = bp->b_bn; |
| 294 | __entry->buffer_length = bp->b_buffer_length; | 294 | __entry->nblks = bp->b_length; |
| 295 | __entry->hold = atomic_read(&bp->b_hold); | 295 | __entry->hold = atomic_read(&bp->b_hold); |
| 296 | __entry->pincount = atomic_read(&bp->b_pin_count); | 296 | __entry->pincount = atomic_read(&bp->b_pin_count); |
| 297 | __entry->lockval = bp->b_sema.count; | 297 | __entry->lockval = bp->b_sema.count; |
| 298 | __entry->flags = bp->b_flags; | 298 | __entry->flags = bp->b_flags; |
| 299 | __entry->caller_ip = caller_ip; | 299 | __entry->caller_ip = caller_ip; |
| 300 | ), | 300 | ), |
| 301 | TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " | 301 | TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " |
| 302 | "lock %d flags %s caller %pf", | 302 | "lock %d flags %s caller %pf", |
| 303 | MAJOR(__entry->dev), MINOR(__entry->dev), | 303 | MAJOR(__entry->dev), MINOR(__entry->dev), |
| 304 | (unsigned long long)__entry->bno, | 304 | (unsigned long long)__entry->bno, |
| 305 | __entry->buffer_length, | 305 | __entry->nblks, |
| 306 | __entry->hold, | 306 | __entry->hold, |
| 307 | __entry->pincount, | 307 | __entry->pincount, |
| 308 | __entry->lockval, | 308 | __entry->lockval, |
| @@ -328,7 +328,7 @@ DEFINE_BUF_EVENT(xfs_buf_unlock); | |||
| 328 | DEFINE_BUF_EVENT(xfs_buf_iowait); | 328 | DEFINE_BUF_EVENT(xfs_buf_iowait); |
| 329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); | 329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); |
| 330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); | 330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); |
| 331 | DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); | 331 | DEFINE_BUF_EVENT(xfs_buf_delwri_queued); |
| 332 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); | 332 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); |
| 333 | DEFINE_BUF_EVENT(xfs_buf_get_uncached); | 333 | DEFINE_BUF_EVENT(xfs_buf_get_uncached); |
| 334 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); | 334 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); |
| @@ -362,7 +362,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, | |||
| 362 | TP_fast_assign( | 362 | TP_fast_assign( |
| 363 | __entry->dev = bp->b_target->bt_dev; | 363 | __entry->dev = bp->b_target->bt_dev; |
| 364 | __entry->bno = bp->b_bn; | 364 | __entry->bno = bp->b_bn; |
| 365 | __entry->buffer_length = bp->b_buffer_length; | 365 | __entry->buffer_length = BBTOB(bp->b_length); |
| 366 | __entry->flags = flags; | 366 | __entry->flags = flags; |
| 367 | __entry->hold = atomic_read(&bp->b_hold); | 367 | __entry->hold = atomic_read(&bp->b_hold); |
| 368 | __entry->pincount = atomic_read(&bp->b_pin_count); | 368 | __entry->pincount = atomic_read(&bp->b_pin_count); |
| @@ -406,7 +406,7 @@ TRACE_EVENT(xfs_buf_ioerror, | |||
| 406 | TP_fast_assign( | 406 | TP_fast_assign( |
| 407 | __entry->dev = bp->b_target->bt_dev; | 407 | __entry->dev = bp->b_target->bt_dev; |
| 408 | __entry->bno = bp->b_bn; | 408 | __entry->bno = bp->b_bn; |
| 409 | __entry->buffer_length = bp->b_buffer_length; | 409 | __entry->buffer_length = BBTOB(bp->b_length); |
| 410 | __entry->hold = atomic_read(&bp->b_hold); | 410 | __entry->hold = atomic_read(&bp->b_hold); |
| 411 | __entry->pincount = atomic_read(&bp->b_pin_count); | 411 | __entry->pincount = atomic_read(&bp->b_pin_count); |
| 412 | __entry->lockval = bp->b_sema.count; | 412 | __entry->lockval = bp->b_sema.count; |
| @@ -450,7 +450,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class, | |||
| 450 | __entry->bli_recur = bip->bli_recur; | 450 | __entry->bli_recur = bip->bli_recur; |
| 451 | __entry->bli_refcount = atomic_read(&bip->bli_refcount); | 451 | __entry->bli_refcount = atomic_read(&bip->bli_refcount); |
| 452 | __entry->buf_bno = bip->bli_buf->b_bn; | 452 | __entry->buf_bno = bip->bli_buf->b_bn; |
| 453 | __entry->buf_len = bip->bli_buf->b_buffer_length; | 453 | __entry->buf_len = BBTOB(bip->bli_buf->b_length); |
| 454 | __entry->buf_flags = bip->bli_buf->b_flags; | 454 | __entry->buf_flags = bip->bli_buf->b_flags; |
| 455 | __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); | 455 | __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); |
| 456 | __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); | 456 | __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); |
| @@ -486,12 +486,10 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); | |||
| 486 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); | 486 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); |
| 487 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); | 487 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); |
| 488 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); | 488 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); |
| 489 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock); | ||
| 490 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); | 489 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); |
| 491 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); | 490 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); |
| 492 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); | 491 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); |
| 493 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); | 492 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); |
| 494 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); | ||
| 495 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); | 493 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); |
| 496 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); | 494 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); |
| 497 | DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); | 495 | DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); |
| @@ -876,15 +874,30 @@ DECLARE_EVENT_CLASS(xfs_log_item_class, | |||
| 876 | __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) | 874 | __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) |
| 877 | ) | 875 | ) |
| 878 | 876 | ||
| 877 | TRACE_EVENT(xfs_log_force, | ||
| 878 | TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn), | ||
| 879 | TP_ARGS(mp, lsn), | ||
| 880 | TP_STRUCT__entry( | ||
| 881 | __field(dev_t, dev) | ||
| 882 | __field(xfs_lsn_t, lsn) | ||
| 883 | ), | ||
| 884 | TP_fast_assign( | ||
| 885 | __entry->dev = mp->m_super->s_dev; | ||
| 886 | __entry->lsn = lsn; | ||
| 887 | ), | ||
| 888 | TP_printk("dev %d:%d lsn 0x%llx", | ||
| 889 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 890 | __entry->lsn) | ||
| 891 | ) | ||
| 892 | |||
| 879 | #define DEFINE_LOG_ITEM_EVENT(name) \ | 893 | #define DEFINE_LOG_ITEM_EVENT(name) \ |
| 880 | DEFINE_EVENT(xfs_log_item_class, name, \ | 894 | DEFINE_EVENT(xfs_log_item_class, name, \ |
| 881 | TP_PROTO(struct xfs_log_item *lip), \ | 895 | TP_PROTO(struct xfs_log_item *lip), \ |
| 882 | TP_ARGS(lip)) | 896 | TP_ARGS(lip)) |
| 883 | DEFINE_LOG_ITEM_EVENT(xfs_ail_push); | 897 | DEFINE_LOG_ITEM_EVENT(xfs_ail_push); |
| 884 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf); | ||
| 885 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf_pinned); | ||
| 886 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); | 898 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); |
| 887 | DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); | 899 | DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); |
| 900 | DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); | ||
| 888 | 901 | ||
| 889 | 902 | ||
| 890 | DECLARE_EVENT_CLASS(xfs_file_class, | 903 | DECLARE_EVENT_CLASS(xfs_file_class, |
| @@ -1145,7 +1158,7 @@ TRACE_EVENT(xfs_bunmap, | |||
| 1145 | 1158 | ||
| 1146 | ); | 1159 | ); |
| 1147 | 1160 | ||
| 1148 | DECLARE_EVENT_CLASS(xfs_busy_class, | 1161 | DECLARE_EVENT_CLASS(xfs_extent_busy_class, |
| 1149 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1162 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
| 1150 | xfs_agblock_t agbno, xfs_extlen_t len), | 1163 | xfs_agblock_t agbno, xfs_extlen_t len), |
| 1151 | TP_ARGS(mp, agno, agbno, len), | 1164 | TP_ARGS(mp, agno, agbno, len), |
| @@ -1168,17 +1181,17 @@ DECLARE_EVENT_CLASS(xfs_busy_class, | |||
| 1168 | __entry->len) | 1181 | __entry->len) |
| 1169 | ); | 1182 | ); |
| 1170 | #define DEFINE_BUSY_EVENT(name) \ | 1183 | #define DEFINE_BUSY_EVENT(name) \ |
| 1171 | DEFINE_EVENT(xfs_busy_class, name, \ | 1184 | DEFINE_EVENT(xfs_extent_busy_class, name, \ |
| 1172 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ | 1185 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ |
| 1173 | xfs_agblock_t agbno, xfs_extlen_t len), \ | 1186 | xfs_agblock_t agbno, xfs_extlen_t len), \ |
| 1174 | TP_ARGS(mp, agno, agbno, len)) | 1187 | TP_ARGS(mp, agno, agbno, len)) |
| 1175 | DEFINE_BUSY_EVENT(xfs_alloc_busy); | 1188 | DEFINE_BUSY_EVENT(xfs_extent_busy); |
| 1176 | DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); | 1189 | DEFINE_BUSY_EVENT(xfs_extent_busy_enomem); |
| 1177 | DEFINE_BUSY_EVENT(xfs_alloc_busy_force); | 1190 | DEFINE_BUSY_EVENT(xfs_extent_busy_force); |
| 1178 | DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); | 1191 | DEFINE_BUSY_EVENT(xfs_extent_busy_reuse); |
| 1179 | DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); | 1192 | DEFINE_BUSY_EVENT(xfs_extent_busy_clear); |
| 1180 | 1193 | ||
| 1181 | TRACE_EVENT(xfs_alloc_busy_trim, | 1194 | TRACE_EVENT(xfs_extent_busy_trim, |
| 1182 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1195 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
| 1183 | xfs_agblock_t agbno, xfs_extlen_t len, | 1196 | xfs_agblock_t agbno, xfs_extlen_t len, |
| 1184 | xfs_agblock_t tbno, xfs_extlen_t tlen), | 1197 | xfs_agblock_t tbno, xfs_extlen_t tlen), |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 103b00c90004..cdf896fcbfa4 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
| @@ -19,9 +19,7 @@ | |||
| 19 | #include "xfs.h" | 19 | #include "xfs.h" |
| 20 | #include "xfs_fs.h" | 20 | #include "xfs_fs.h" |
| 21 | #include "xfs_types.h" | 21 | #include "xfs_types.h" |
| 22 | #include "xfs_bit.h" | ||
| 23 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 24 | #include "xfs_inum.h" | ||
| 25 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 26 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 27 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
| @@ -36,6 +34,7 @@ | |||
| 36 | #include "xfs_btree.h" | 34 | #include "xfs_btree.h" |
| 37 | #include "xfs_ialloc.h" | 35 | #include "xfs_ialloc.h" |
| 38 | #include "xfs_alloc.h" | 36 | #include "xfs_alloc.h" |
| 37 | #include "xfs_extent_busy.h" | ||
| 39 | #include "xfs_bmap.h" | 38 | #include "xfs_bmap.h" |
| 40 | #include "xfs_quota.h" | 39 | #include "xfs_quota.h" |
| 41 | #include "xfs_trans_priv.h" | 40 | #include "xfs_trans_priv.h" |
| @@ -608,8 +607,8 @@ STATIC void | |||
| 608 | xfs_trans_free( | 607 | xfs_trans_free( |
| 609 | struct xfs_trans *tp) | 608 | struct xfs_trans *tp) |
| 610 | { | 609 | { |
| 611 | xfs_alloc_busy_sort(&tp->t_busy); | 610 | xfs_extent_busy_sort(&tp->t_busy); |
| 612 | xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false); | 611 | xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); |
| 613 | 612 | ||
| 614 | atomic_dec(&tp->t_mountp->m_active_trans); | 613 | atomic_dec(&tp->t_mountp->m_active_trans); |
| 615 | xfs_trans_free_dqinfo(tp); | 614 | xfs_trans_free_dqinfo(tp); |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index f6118703f20d..7ab99e1898c8 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
| @@ -345,11 +345,9 @@ struct xfs_item_ops { | |||
| 345 | void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); | 345 | void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); |
| 346 | void (*iop_pin)(xfs_log_item_t *); | 346 | void (*iop_pin)(xfs_log_item_t *); |
| 347 | void (*iop_unpin)(xfs_log_item_t *, int remove); | 347 | void (*iop_unpin)(xfs_log_item_t *, int remove); |
| 348 | uint (*iop_trylock)(xfs_log_item_t *); | 348 | uint (*iop_push)(struct xfs_log_item *, struct list_head *); |
| 349 | void (*iop_unlock)(xfs_log_item_t *); | 349 | void (*iop_unlock)(xfs_log_item_t *); |
| 350 | xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); | 350 | xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); |
| 351 | void (*iop_push)(xfs_log_item_t *); | ||
| 352 | bool (*iop_pushbuf)(xfs_log_item_t *); | ||
| 353 | void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); | 351 | void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); |
| 354 | }; | 352 | }; |
| 355 | 353 | ||
| @@ -357,20 +355,18 @@ struct xfs_item_ops { | |||
| 357 | #define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) | 355 | #define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) |
| 358 | #define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) | 356 | #define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) |
| 359 | #define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove) | 357 | #define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove) |
| 360 | #define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) | 358 | #define IOP_PUSH(ip, list) (*(ip)->li_ops->iop_push)(ip, list) |
| 361 | #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) | 359 | #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) |
| 362 | #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) | 360 | #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) |
| 363 | #define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) | ||
| 364 | #define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) | ||
| 365 | #define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) | 361 | #define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) |
| 366 | 362 | ||
| 367 | /* | 363 | /* |
| 368 | * Return values for the IOP_TRYLOCK() routines. | 364 | * Return values for the IOP_PUSH() routines. |
| 369 | */ | 365 | */ |
| 370 | #define XFS_ITEM_SUCCESS 0 | 366 | #define XFS_ITEM_SUCCESS 0 |
| 371 | #define XFS_ITEM_PINNED 1 | 367 | #define XFS_ITEM_PINNED 1 |
| 372 | #define XFS_ITEM_LOCKED 2 | 368 | #define XFS_ITEM_LOCKED 2 |
| 373 | #define XFS_ITEM_PUSHBUF 3 | 369 | #define XFS_ITEM_FLUSHING 3 |
| 374 | 370 | ||
| 375 | /* | 371 | /* |
| 376 | * This is the type of function which can be given to xfs_trans_callback() | 372 | * This is the type of function which can be given to xfs_trans_callback() |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 1dead07f092c..9c514483e599 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "xfs_fs.h" | 20 | #include "xfs_fs.h" |
| 21 | #include "xfs_types.h" | 21 | #include "xfs_types.h" |
| 22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
| @@ -79,7 +78,7 @@ xfs_ail_check( | |||
| 79 | * Return a pointer to the first item in the AIL. If the AIL is empty, then | 78 | * Return a pointer to the first item in the AIL. If the AIL is empty, then |
| 80 | * return NULL. | 79 | * return NULL. |
| 81 | */ | 80 | */ |
| 82 | static xfs_log_item_t * | 81 | xfs_log_item_t * |
| 83 | xfs_ail_min( | 82 | xfs_ail_min( |
| 84 | struct xfs_ail *ailp) | 83 | struct xfs_ail *ailp) |
| 85 | { | 84 | { |
| @@ -364,30 +363,31 @@ xfsaild_push( | |||
| 364 | xfs_log_item_t *lip; | 363 | xfs_log_item_t *lip; |
| 365 | xfs_lsn_t lsn; | 364 | xfs_lsn_t lsn; |
| 366 | xfs_lsn_t target; | 365 | xfs_lsn_t target; |
| 367 | long tout = 10; | 366 | long tout; |
| 368 | int stuck = 0; | 367 | int stuck = 0; |
| 368 | int flushing = 0; | ||
| 369 | int count = 0; | 369 | int count = 0; |
| 370 | int push_xfsbufd = 0; | ||
| 371 | 370 | ||
| 372 | /* | 371 | /* |
| 373 | * If last time we ran we encountered pinned items, force the log first | 372 | * If we encountered pinned items or did not finish writing out all |
| 374 | * and wait for it before pushing again. | 373 | * buffers the last time we ran, force the log first and wait for it |
| 374 | * before pushing again. | ||
| 375 | */ | 375 | */ |
| 376 | spin_lock(&ailp->xa_lock); | 376 | if (ailp->xa_log_flush && ailp->xa_last_pushed_lsn == 0 && |
| 377 | if (ailp->xa_last_pushed_lsn == 0 && ailp->xa_log_flush && | 377 | (!list_empty_careful(&ailp->xa_buf_list) || |
| 378 | !list_empty(&ailp->xa_ail)) { | 378 | xfs_ail_min_lsn(ailp))) { |
| 379 | ailp->xa_log_flush = 0; | 379 | ailp->xa_log_flush = 0; |
| 380 | spin_unlock(&ailp->xa_lock); | 380 | |
| 381 | XFS_STATS_INC(xs_push_ail_flush); | 381 | XFS_STATS_INC(xs_push_ail_flush); |
| 382 | xfs_log_force(mp, XFS_LOG_SYNC); | 382 | xfs_log_force(mp, XFS_LOG_SYNC); |
| 383 | spin_lock(&ailp->xa_lock); | ||
| 384 | } | 383 | } |
| 385 | 384 | ||
| 386 | target = ailp->xa_target; | 385 | spin_lock(&ailp->xa_lock); |
| 387 | lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); | 386 | lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); |
| 388 | if (!lip || XFS_FORCED_SHUTDOWN(mp)) { | 387 | if (!lip) { |
| 389 | /* | 388 | /* |
| 390 | * AIL is empty or our push has reached the end. | 389 | * If the AIL is empty or our push has reached the end we are |
| 390 | * done now. | ||
| 391 | */ | 391 | */ |
| 392 | xfs_trans_ail_cursor_done(ailp, &cur); | 392 | xfs_trans_ail_cursor_done(ailp, &cur); |
| 393 | spin_unlock(&ailp->xa_lock); | 393 | spin_unlock(&ailp->xa_lock); |
| @@ -396,54 +396,42 @@ xfsaild_push( | |||
| 396 | 396 | ||
| 397 | XFS_STATS_INC(xs_push_ail); | 397 | XFS_STATS_INC(xs_push_ail); |
| 398 | 398 | ||
| 399 | /* | ||
| 400 | * While the item we are looking at is below the given threshold | ||
| 401 | * try to flush it out. We'd like not to stop until we've at least | ||
| 402 | * tried to push on everything in the AIL with an LSN less than | ||
| 403 | * the given threshold. | ||
| 404 | * | ||
| 405 | * However, we will stop after a certain number of pushes and wait | ||
| 406 | * for a reduced timeout to fire before pushing further. This | ||
| 407 | * prevents use from spinning when we can't do anything or there is | ||
| 408 | * lots of contention on the AIL lists. | ||
| 409 | */ | ||
| 410 | lsn = lip->li_lsn; | 399 | lsn = lip->li_lsn; |
| 400 | target = ailp->xa_target; | ||
| 411 | while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { | 401 | while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { |
| 412 | int lock_result; | 402 | int lock_result; |
| 403 | |||
| 413 | /* | 404 | /* |
| 414 | * If we can lock the item without sleeping, unlock the AIL | 405 | * Note that IOP_PUSH may unlock and reacquire the AIL lock. We |
| 415 | * lock and flush the item. Then re-grab the AIL lock so we | 406 | * rely on the AIL cursor implementation to be able to deal with |
| 416 | * can look for the next item on the AIL. List changes are | 407 | * the dropped lock. |
| 417 | * handled by the AIL lookup functions internally | ||
| 418 | * | ||
| 419 | * If we can't lock the item, either its holder will flush it | ||
| 420 | * or it is already being flushed or it is being relogged. In | ||
| 421 | * any of these case it is being taken care of and we can just | ||
| 422 | * skip to the next item in the list. | ||
| 423 | */ | 408 | */ |
| 424 | lock_result = IOP_TRYLOCK(lip); | 409 | lock_result = IOP_PUSH(lip, &ailp->xa_buf_list); |
| 425 | spin_unlock(&ailp->xa_lock); | ||
| 426 | switch (lock_result) { | 410 | switch (lock_result) { |
| 427 | case XFS_ITEM_SUCCESS: | 411 | case XFS_ITEM_SUCCESS: |
| 428 | XFS_STATS_INC(xs_push_ail_success); | 412 | XFS_STATS_INC(xs_push_ail_success); |
| 429 | trace_xfs_ail_push(lip); | 413 | trace_xfs_ail_push(lip); |
| 430 | 414 | ||
| 431 | IOP_PUSH(lip); | ||
| 432 | ailp->xa_last_pushed_lsn = lsn; | 415 | ailp->xa_last_pushed_lsn = lsn; |
| 433 | break; | 416 | break; |
| 434 | 417 | ||
| 435 | case XFS_ITEM_PUSHBUF: | 418 | case XFS_ITEM_FLUSHING: |
| 436 | XFS_STATS_INC(xs_push_ail_pushbuf); | 419 | /* |
| 437 | trace_xfs_ail_pushbuf(lip); | 420 | * The item or its backing buffer is already beeing |
| 438 | 421 | * flushed. The typical reason for that is that an | |
| 439 | if (!IOP_PUSHBUF(lip)) { | 422 | * inode buffer is locked because we already pushed the |
| 440 | trace_xfs_ail_pushbuf_pinned(lip); | 423 | * updates to it as part of inode clustering. |
| 441 | stuck++; | 424 | * |
| 442 | ailp->xa_log_flush++; | 425 | * We do not want to to stop flushing just because lots |
| 443 | } else { | 426 | * of items are already beeing flushed, but we need to |
| 444 | ailp->xa_last_pushed_lsn = lsn; | 427 | * re-try the flushing relatively soon if most of the |
| 445 | } | 428 | * AIL is beeing flushed. |
| 446 | push_xfsbufd = 1; | 429 | */ |
| 430 | XFS_STATS_INC(xs_push_ail_flushing); | ||
| 431 | trace_xfs_ail_flushing(lip); | ||
| 432 | |||
| 433 | flushing++; | ||
| 434 | ailp->xa_last_pushed_lsn = lsn; | ||
| 447 | break; | 435 | break; |
| 448 | 436 | ||
| 449 | case XFS_ITEM_PINNED: | 437 | case XFS_ITEM_PINNED: |
| @@ -453,28 +441,22 @@ xfsaild_push( | |||
| 453 | stuck++; | 441 | stuck++; |
| 454 | ailp->xa_log_flush++; | 442 | ailp->xa_log_flush++; |
| 455 | break; | 443 | break; |
| 456 | |||
| 457 | case XFS_ITEM_LOCKED: | 444 | case XFS_ITEM_LOCKED: |
| 458 | XFS_STATS_INC(xs_push_ail_locked); | 445 | XFS_STATS_INC(xs_push_ail_locked); |
| 459 | trace_xfs_ail_locked(lip); | 446 | trace_xfs_ail_locked(lip); |
| 447 | |||
| 460 | stuck++; | 448 | stuck++; |
| 461 | break; | 449 | break; |
| 462 | |||
| 463 | default: | 450 | default: |
| 464 | ASSERT(0); | 451 | ASSERT(0); |
| 465 | break; | 452 | break; |
| 466 | } | 453 | } |
| 467 | 454 | ||
| 468 | spin_lock(&ailp->xa_lock); | ||
| 469 | /* should we bother continuing? */ | ||
| 470 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
| 471 | break; | ||
| 472 | ASSERT(mp->m_log); | ||
| 473 | |||
| 474 | count++; | 455 | count++; |
| 475 | 456 | ||
| 476 | /* | 457 | /* |
| 477 | * Are there too many items we can't do anything with? | 458 | * Are there too many items we can't do anything with? |
| 459 | * | ||
| 478 | * If we we are skipping too many items because we can't flush | 460 | * If we we are skipping too many items because we can't flush |
| 479 | * them or they are already being flushed, we back off and | 461 | * them or they are already being flushed, we back off and |
| 480 | * given them time to complete whatever operation is being | 462 | * given them time to complete whatever operation is being |
| @@ -496,42 +478,36 @@ xfsaild_push( | |||
| 496 | xfs_trans_ail_cursor_done(ailp, &cur); | 478 | xfs_trans_ail_cursor_done(ailp, &cur); |
| 497 | spin_unlock(&ailp->xa_lock); | 479 | spin_unlock(&ailp->xa_lock); |
| 498 | 480 | ||
| 499 | if (push_xfsbufd) { | 481 | if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list)) |
| 500 | /* we've got delayed write buffers to flush */ | 482 | ailp->xa_log_flush++; |
| 501 | wake_up_process(mp->m_ddev_targp->bt_task); | ||
| 502 | } | ||
| 503 | 483 | ||
| 504 | /* assume we have more work to do in a short while */ | 484 | if (!count || XFS_LSN_CMP(lsn, target) >= 0) { |
| 505 | out_done: | 485 | out_done: |
| 506 | if (!count) { | ||
| 507 | /* We're past our target or empty, so idle */ | ||
| 508 | ailp->xa_last_pushed_lsn = 0; | ||
| 509 | ailp->xa_log_flush = 0; | ||
| 510 | |||
| 511 | tout = 50; | ||
| 512 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { | ||
| 513 | /* | 486 | /* |
| 514 | * We reached the target so wait a bit longer for I/O to | 487 | * We reached the target or the AIL is empty, so wait a bit |
| 515 | * complete and remove pushed items from the AIL before we | 488 | * longer for I/O to complete and remove pushed items from the |
| 516 | * start the next scan from the start of the AIL. | 489 | * AIL before we start the next scan from the start of the AIL. |
| 517 | */ | 490 | */ |
| 518 | tout = 50; | 491 | tout = 50; |
| 519 | ailp->xa_last_pushed_lsn = 0; | 492 | ailp->xa_last_pushed_lsn = 0; |
| 520 | } else if ((stuck * 100) / count > 90) { | 493 | } else if (((stuck + flushing) * 100) / count > 90) { |
| 521 | /* | 494 | /* |
| 522 | * Either there is a lot of contention on the AIL or we | 495 | * Either there is a lot of contention on the AIL or we are |
| 523 | * are stuck due to operations in progress. "Stuck" in this | 496 | * stuck due to operations in progress. "Stuck" in this case |
| 524 | * case is defined as >90% of the items we tried to push | 497 | * is defined as >90% of the items we tried to push were stuck. |
| 525 | * were stuck. | ||
| 526 | * | 498 | * |
| 527 | * Backoff a bit more to allow some I/O to complete before | 499 | * Backoff a bit more to allow some I/O to complete before |
| 528 | * restarting from the start of the AIL. This prevents us | 500 | * restarting from the start of the AIL. This prevents us from |
| 529 | * from spinning on the same items, and if they are pinned will | 501 | * spinning on the same items, and if they are pinned will all |
| 530 | * all the restart to issue a log force to unpin the stuck | 502 | * the restart to issue a log force to unpin the stuck items. |
| 531 | * items. | ||
| 532 | */ | 503 | */ |
| 533 | tout = 20; | 504 | tout = 20; |
| 534 | ailp->xa_last_pushed_lsn = 0; | 505 | ailp->xa_last_pushed_lsn = 0; |
| 506 | } else { | ||
| 507 | /* | ||
| 508 | * Assume we have more work to do in a short while. | ||
| 509 | */ | ||
| 510 | tout = 10; | ||
| 535 | } | 511 | } |
| 536 | 512 | ||
| 537 | return tout; | 513 | return tout; |
| @@ -544,6 +520,8 @@ xfsaild( | |||
| 544 | struct xfs_ail *ailp = data; | 520 | struct xfs_ail *ailp = data; |
| 545 | long tout = 0; /* milliseconds */ | 521 | long tout = 0; /* milliseconds */ |
| 546 | 522 | ||
| 523 | current->flags |= PF_MEMALLOC; | ||
| 524 | |||
| 547 | while (!kthread_should_stop()) { | 525 | while (!kthread_should_stop()) { |
| 548 | if (tout && tout <= 20) | 526 | if (tout && tout <= 20) |
| 549 | __set_current_state(TASK_KILLABLE); | 527 | __set_current_state(TASK_KILLABLE); |
| @@ -611,6 +589,30 @@ xfs_ail_push_all( | |||
| 611 | } | 589 | } |
| 612 | 590 | ||
| 613 | /* | 591 | /* |
| 592 | * Push out all items in the AIL immediately and wait until the AIL is empty. | ||
| 593 | */ | ||
| 594 | void | ||
| 595 | xfs_ail_push_all_sync( | ||
| 596 | struct xfs_ail *ailp) | ||
| 597 | { | ||
| 598 | struct xfs_log_item *lip; | ||
| 599 | DEFINE_WAIT(wait); | ||
| 600 | |||
| 601 | spin_lock(&ailp->xa_lock); | ||
| 602 | while ((lip = xfs_ail_max(ailp)) != NULL) { | ||
| 603 | prepare_to_wait(&ailp->xa_empty, &wait, TASK_UNINTERRUPTIBLE); | ||
| 604 | ailp->xa_target = lip->li_lsn; | ||
| 605 | wake_up_process(ailp->xa_task); | ||
| 606 | spin_unlock(&ailp->xa_lock); | ||
| 607 | schedule(); | ||
| 608 | spin_lock(&ailp->xa_lock); | ||
| 609 | } | ||
| 610 | spin_unlock(&ailp->xa_lock); | ||
| 611 | |||
| 612 | finish_wait(&ailp->xa_empty, &wait); | ||
| 613 | } | ||
| 614 | |||
| 615 | /* | ||
| 614 | * xfs_trans_ail_update - bulk AIL insertion operation. | 616 | * xfs_trans_ail_update - bulk AIL insertion operation. |
| 615 | * | 617 | * |
| 616 | * @xfs_trans_ail_update takes an array of log items that all need to be | 618 | * @xfs_trans_ail_update takes an array of log items that all need to be |
| @@ -667,11 +669,15 @@ xfs_trans_ail_update_bulk( | |||
| 667 | 669 | ||
| 668 | if (!list_empty(&tmp)) | 670 | if (!list_empty(&tmp)) |
| 669 | xfs_ail_splice(ailp, cur, &tmp, lsn); | 671 | xfs_ail_splice(ailp, cur, &tmp, lsn); |
| 670 | spin_unlock(&ailp->xa_lock); | ||
| 671 | 672 | ||
| 672 | if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { | 673 | if (mlip_changed) { |
| 673 | xlog_assign_tail_lsn(ailp->xa_mount); | 674 | if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) |
| 675 | xlog_assign_tail_lsn_locked(ailp->xa_mount); | ||
| 676 | spin_unlock(&ailp->xa_lock); | ||
| 677 | |||
| 674 | xfs_log_space_wake(ailp->xa_mount); | 678 | xfs_log_space_wake(ailp->xa_mount); |
| 679 | } else { | ||
| 680 | spin_unlock(&ailp->xa_lock); | ||
| 675 | } | 681 | } |
| 676 | } | 682 | } |
| 677 | 683 | ||
| @@ -700,7 +706,8 @@ void | |||
| 700 | xfs_trans_ail_delete_bulk( | 706 | xfs_trans_ail_delete_bulk( |
| 701 | struct xfs_ail *ailp, | 707 | struct xfs_ail *ailp, |
| 702 | struct xfs_log_item **log_items, | 708 | struct xfs_log_item **log_items, |
| 703 | int nr_items) __releases(ailp->xa_lock) | 709 | int nr_items, |
| 710 | int shutdown_type) __releases(ailp->xa_lock) | ||
| 704 | { | 711 | { |
| 705 | xfs_log_item_t *mlip; | 712 | xfs_log_item_t *mlip; |
| 706 | int mlip_changed = 0; | 713 | int mlip_changed = 0; |
| @@ -718,7 +725,7 @@ xfs_trans_ail_delete_bulk( | |||
| 718 | xfs_alert_tag(mp, XFS_PTAG_AILDELETE, | 725 | xfs_alert_tag(mp, XFS_PTAG_AILDELETE, |
| 719 | "%s: attempting to delete a log item that is not in the AIL", | 726 | "%s: attempting to delete a log item that is not in the AIL", |
| 720 | __func__); | 727 | __func__); |
| 721 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 728 | xfs_force_shutdown(mp, shutdown_type); |
| 722 | } | 729 | } |
| 723 | return; | 730 | return; |
| 724 | } | 731 | } |
| @@ -729,28 +736,20 @@ xfs_trans_ail_delete_bulk( | |||
| 729 | if (mlip == lip) | 736 | if (mlip == lip) |
| 730 | mlip_changed = 1; | 737 | mlip_changed = 1; |
| 731 | } | 738 | } |
| 732 | spin_unlock(&ailp->xa_lock); | ||
| 733 | 739 | ||
| 734 | if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { | 740 | if (mlip_changed) { |
| 735 | xlog_assign_tail_lsn(ailp->xa_mount); | 741 | if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) |
| 742 | xlog_assign_tail_lsn_locked(ailp->xa_mount); | ||
| 743 | if (list_empty(&ailp->xa_ail)) | ||
| 744 | wake_up_all(&ailp->xa_empty); | ||
| 745 | spin_unlock(&ailp->xa_lock); | ||
| 746 | |||
| 736 | xfs_log_space_wake(ailp->xa_mount); | 747 | xfs_log_space_wake(ailp->xa_mount); |
| 748 | } else { | ||
| 749 | spin_unlock(&ailp->xa_lock); | ||
| 737 | } | 750 | } |
| 738 | } | 751 | } |
| 739 | 752 | ||
| 740 | /* | ||
| 741 | * The active item list (AIL) is a doubly linked list of log | ||
| 742 | * items sorted by ascending lsn. The base of the list is | ||
| 743 | * a forw/back pointer pair embedded in the xfs mount structure. | ||
| 744 | * The base is initialized with both pointers pointing to the | ||
| 745 | * base. This case always needs to be distinguished, because | ||
| 746 | * the base has no lsn to look at. We almost always insert | ||
| 747 | * at the end of the list, so on inserts we search from the | ||
| 748 | * end of the list to find where the new item belongs. | ||
| 749 | */ | ||
| 750 | |||
| 751 | /* | ||
| 752 | * Initialize the doubly linked list to point only to itself. | ||
| 753 | */ | ||
| 754 | int | 753 | int |
| 755 | xfs_trans_ail_init( | 754 | xfs_trans_ail_init( |
| 756 | xfs_mount_t *mp) | 755 | xfs_mount_t *mp) |
| @@ -765,6 +764,8 @@ xfs_trans_ail_init( | |||
| 765 | INIT_LIST_HEAD(&ailp->xa_ail); | 764 | INIT_LIST_HEAD(&ailp->xa_ail); |
| 766 | INIT_LIST_HEAD(&ailp->xa_cursors); | 765 | INIT_LIST_HEAD(&ailp->xa_cursors); |
| 767 | spin_lock_init(&ailp->xa_lock); | 766 | spin_lock_init(&ailp->xa_lock); |
| 767 | INIT_LIST_HEAD(&ailp->xa_buf_list); | ||
| 768 | init_waitqueue_head(&ailp->xa_empty); | ||
| 768 | 769 | ||
| 769 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", | 770 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", |
| 770 | ailp->xa_mount->m_fsname); | 771 | ailp->xa_mount->m_fsname); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 1302d1d95a58..21c5a5e3700d 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
| @@ -18,9 +18,7 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
| @@ -33,7 +31,6 @@ | |||
| 33 | #include "xfs_buf_item.h" | 31 | #include "xfs_buf_item.h" |
| 34 | #include "xfs_trans_priv.h" | 32 | #include "xfs_trans_priv.h" |
| 35 | #include "xfs_error.h" | 33 | #include "xfs_error.h" |
| 36 | #include "xfs_rw.h" | ||
| 37 | #include "xfs_trace.h" | 34 | #include "xfs_trace.h" |
| 38 | 35 | ||
| 39 | /* | 36 | /* |
| @@ -56,7 +53,7 @@ xfs_trans_buf_item_match( | |||
| 56 | if (blip->bli_item.li_type == XFS_LI_BUF && | 53 | if (blip->bli_item.li_type == XFS_LI_BUF && |
| 57 | blip->bli_buf->b_target == target && | 54 | blip->bli_buf->b_target == target && |
| 58 | XFS_BUF_ADDR(blip->bli_buf) == blkno && | 55 | XFS_BUF_ADDR(blip->bli_buf) == blkno && |
| 59 | XFS_BUF_COUNT(blip->bli_buf) == len) | 56 | BBTOB(blip->bli_buf->b_length) == len) |
| 60 | return blip->bli_buf; | 57 | return blip->bli_buf; |
| 61 | } | 58 | } |
| 62 | 59 | ||
| @@ -141,15 +138,11 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
| 141 | xfs_buf_t *bp; | 138 | xfs_buf_t *bp; |
| 142 | xfs_buf_log_item_t *bip; | 139 | xfs_buf_log_item_t *bip; |
| 143 | 140 | ||
| 144 | if (flags == 0) | ||
| 145 | flags = XBF_LOCK | XBF_MAPPED; | ||
| 146 | |||
| 147 | /* | 141 | /* |
| 148 | * Default to a normal get_buf() call if the tp is NULL. | 142 | * Default to a normal get_buf() call if the tp is NULL. |
| 149 | */ | 143 | */ |
| 150 | if (tp == NULL) | 144 | if (tp == NULL) |
| 151 | return xfs_buf_get(target_dev, blkno, len, | 145 | return xfs_buf_get(target_dev, blkno, len, flags); |
| 152 | flags | XBF_DONT_BLOCK); | ||
| 153 | 146 | ||
| 154 | /* | 147 | /* |
| 155 | * If we find the buffer in the cache with this transaction | 148 | * If we find the buffer in the cache with this transaction |
| @@ -165,14 +158,6 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
| 165 | XFS_BUF_DONE(bp); | 158 | XFS_BUF_DONE(bp); |
| 166 | } | 159 | } |
| 167 | 160 | ||
| 168 | /* | ||
| 169 | * If the buffer is stale then it was binval'ed | ||
| 170 | * since last read. This doesn't matter since the | ||
| 171 | * caller isn't allowed to use the data anyway. | ||
| 172 | */ | ||
| 173 | else if (XFS_BUF_ISSTALE(bp)) | ||
| 174 | ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); | ||
| 175 | |||
| 176 | ASSERT(bp->b_transp == tp); | 161 | ASSERT(bp->b_transp == tp); |
| 177 | bip = bp->b_fspriv; | 162 | bip = bp->b_fspriv; |
| 178 | ASSERT(bip != NULL); | 163 | ASSERT(bip != NULL); |
| @@ -182,15 +167,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
| 182 | return (bp); | 167 | return (bp); |
| 183 | } | 168 | } |
| 184 | 169 | ||
| 185 | /* | 170 | bp = xfs_buf_get(target_dev, blkno, len, flags); |
| 186 | * We always specify the XBF_DONT_BLOCK flag within a transaction | ||
| 187 | * so that get_buf does not try to push out a delayed write buffer | ||
| 188 | * which might cause another transaction to take place (if the | ||
| 189 | * buffer was delayed alloc). Such recursive transactions can | ||
| 190 | * easily deadlock with our current transaction as well as cause | ||
| 191 | * us to run out of stack space. | ||
| 192 | */ | ||
| 193 | bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK); | ||
| 194 | if (bp == NULL) { | 171 | if (bp == NULL) { |
| 195 | return NULL; | 172 | return NULL; |
| 196 | } | 173 | } |
| @@ -282,14 +259,13 @@ xfs_trans_read_buf( | |||
| 282 | xfs_buf_log_item_t *bip; | 259 | xfs_buf_log_item_t *bip; |
| 283 | int error; | 260 | int error; |
| 284 | 261 | ||
| 285 | if (flags == 0) | 262 | *bpp = NULL; |
| 286 | flags = XBF_LOCK | XBF_MAPPED; | ||
| 287 | 263 | ||
| 288 | /* | 264 | /* |
| 289 | * Default to a normal get_buf() call if the tp is NULL. | 265 | * Default to a normal get_buf() call if the tp is NULL. |
| 290 | */ | 266 | */ |
| 291 | if (tp == NULL) { | 267 | if (tp == NULL) { |
| 292 | bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); | 268 | bp = xfs_buf_read(target, blkno, len, flags); |
| 293 | if (!bp) | 269 | if (!bp) |
| 294 | return (flags & XBF_TRYLOCK) ? | 270 | return (flags & XBF_TRYLOCK) ? |
| 295 | EAGAIN : XFS_ERROR(ENOMEM); | 271 | EAGAIN : XFS_ERROR(ENOMEM); |
| @@ -297,6 +273,8 @@ xfs_trans_read_buf( | |||
| 297 | if (bp->b_error) { | 273 | if (bp->b_error) { |
| 298 | error = bp->b_error; | 274 | error = bp->b_error; |
| 299 | xfs_buf_ioerror_alert(bp, __func__); | 275 | xfs_buf_ioerror_alert(bp, __func__); |
| 276 | XFS_BUF_UNDONE(bp); | ||
| 277 | xfs_buf_stale(bp); | ||
| 300 | xfs_buf_relse(bp); | 278 | xfs_buf_relse(bp); |
| 301 | return error; | 279 | return error; |
| 302 | } | 280 | } |
| @@ -371,15 +349,7 @@ xfs_trans_read_buf( | |||
| 371 | return 0; | 349 | return 0; |
| 372 | } | 350 | } |
| 373 | 351 | ||
| 374 | /* | 352 | bp = xfs_buf_read(target, blkno, len, flags); |
| 375 | * We always specify the XBF_DONT_BLOCK flag within a transaction | ||
| 376 | * so that get_buf does not try to push out a delayed write buffer | ||
| 377 | * which might cause another transaction to take place (if the | ||
| 378 | * buffer was delayed alloc). Such recursive transactions can | ||
| 379 | * easily deadlock with our current transaction as well as cause | ||
| 380 | * us to run out of stack space. | ||
| 381 | */ | ||
| 382 | bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); | ||
| 383 | if (bp == NULL) { | 353 | if (bp == NULL) { |
| 384 | *bpp = NULL; | 354 | *bpp = NULL; |
| 385 | return (flags & XBF_TRYLOCK) ? | 355 | return (flags & XBF_TRYLOCK) ? |
| @@ -418,19 +388,6 @@ xfs_trans_read_buf( | |||
| 418 | return 0; | 388 | return 0; |
| 419 | 389 | ||
| 420 | shutdown_abort: | 390 | shutdown_abort: |
| 421 | /* | ||
| 422 | * the theory here is that buffer is good but we're | ||
| 423 | * bailing out because the filesystem is being forcibly | ||
| 424 | * shut down. So we should leave the b_flags alone since | ||
| 425 | * the buffer's not staled and just get out. | ||
| 426 | */ | ||
| 427 | #if defined(DEBUG) | ||
| 428 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) | ||
| 429 | xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); | ||
| 430 | #endif | ||
| 431 | ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) != | ||
| 432 | (XBF_STALE|XBF_DELWRI)); | ||
| 433 | |||
| 434 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); | 391 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); |
| 435 | xfs_buf_relse(bp); | 392 | xfs_buf_relse(bp); |
| 436 | *bpp = NULL; | 393 | *bpp = NULL; |
| @@ -606,7 +563,7 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
| 606 | 563 | ||
| 607 | ASSERT(bp->b_transp == tp); | 564 | ASSERT(bp->b_transp == tp); |
| 608 | ASSERT(bip != NULL); | 565 | ASSERT(bip != NULL); |
| 609 | ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); | 566 | ASSERT(first <= last && last < BBTOB(bp->b_length)); |
| 610 | ASSERT(bp->b_iodone == NULL || | 567 | ASSERT(bp->b_iodone == NULL || |
| 611 | bp->b_iodone == xfs_buf_iodone_callbacks); | 568 | bp->b_iodone == xfs_buf_iodone_callbacks); |
| 612 | 569 | ||
| @@ -626,8 +583,6 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
| 626 | bp->b_iodone = xfs_buf_iodone_callbacks; | 583 | bp->b_iodone = xfs_buf_iodone_callbacks; |
| 627 | bip->bli_item.li_cb = xfs_buf_iodone; | 584 | bip->bli_item.li_cb = xfs_buf_iodone; |
| 628 | 585 | ||
| 629 | xfs_buf_delwri_queue(bp); | ||
| 630 | |||
| 631 | trace_xfs_trans_log_buf(bip); | 586 | trace_xfs_trans_log_buf(bip); |
| 632 | 587 | ||
| 633 | /* | 588 | /* |
| @@ -651,22 +606,33 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
| 651 | 606 | ||
| 652 | 607 | ||
| 653 | /* | 608 | /* |
| 654 | * This called to invalidate a buffer that is being used within | 609 | * Invalidate a buffer that is being used within a transaction. |
| 655 | * a transaction. Typically this is because the blocks in the | 610 | * |
| 656 | * buffer are being freed, so we need to prevent it from being | 611 | * Typically this is because the blocks in the buffer are being freed, so we |
| 657 | * written out when we're done. Allowing it to be written again | 612 | * need to prevent it from being written out when we're done. Allowing it |
| 658 | * might overwrite data in the free blocks if they are reallocated | 613 | * to be written again might overwrite data in the free blocks if they are |
| 659 | * to a file. | 614 | * reallocated to a file. |
| 615 | * | ||
| 616 | * We prevent the buffer from being written out by marking it stale. We can't | ||
| 617 | * get rid of the buf log item at this point because the buffer may still be | ||
| 618 | * pinned by another transaction. If that is the case, then we'll wait until | ||
| 619 | * the buffer is committed to disk for the last time (we can tell by the ref | ||
| 620 | * count) and free it in xfs_buf_item_unpin(). Until that happens we will | ||
| 621 | * keep the buffer locked so that the buffer and buf log item are not reused. | ||
| 622 | * | ||
| 623 | * We also set the XFS_BLF_CANCEL flag in the buf log format structure and log | ||
| 624 | * the buf item. This will be used at recovery time to determine that copies | ||
| 625 | * of the buffer in the log before this should not be replayed. | ||
| 660 | * | 626 | * |
| 661 | * We prevent the buffer from being written out by clearing the | 627 | * We mark the item descriptor and the transaction dirty so that we'll hold |
| 662 | * B_DELWRI flag. We can't always | 628 | * the buffer until after the commit. |
| 663 | * get rid of the buf log item at this point, though, because | 629 | * |
| 664 | * the buffer may still be pinned by another transaction. If that | 630 | * Since we're invalidating the buffer, we also clear the state about which |
| 665 | * is the case, then we'll wait until the buffer is committed to | 631 | * parts of the buffer have been logged. We also clear the flag indicating |
| 666 | * disk for the last time (we can tell by the ref count) and | 632 | * that this is an inode buffer since the data in the buffer will no longer |
| 667 | * free it in xfs_buf_item_unpin(). Until it is cleaned up we | 633 | * be valid. |
| 668 | * will keep the buffer locked so that the buffer and buf log item | 634 | * |
| 669 | * are not reused. | 635 | * We set the stale bit in the buffer as well since we're getting rid of it. |
| 670 | */ | 636 | */ |
| 671 | void | 637 | void |
| 672 | xfs_trans_binval( | 638 | xfs_trans_binval( |
| @@ -686,7 +652,6 @@ xfs_trans_binval( | |||
| 686 | * If the buffer is already invalidated, then | 652 | * If the buffer is already invalidated, then |
| 687 | * just return. | 653 | * just return. |
| 688 | */ | 654 | */ |
| 689 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | ||
| 690 | ASSERT(XFS_BUF_ISSTALE(bp)); | 655 | ASSERT(XFS_BUF_ISSTALE(bp)); |
| 691 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); | 656 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); |
| 692 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); | 657 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); |
| @@ -696,27 +661,8 @@ xfs_trans_binval( | |||
| 696 | return; | 661 | return; |
| 697 | } | 662 | } |
| 698 | 663 | ||
| 699 | /* | ||
| 700 | * Clear the dirty bit in the buffer and set the STALE flag | ||
| 701 | * in the buf log item. The STALE flag will be used in | ||
| 702 | * xfs_buf_item_unpin() to determine if it should clean up | ||
| 703 | * when the last reference to the buf item is given up. | ||
| 704 | * We set the XFS_BLF_CANCEL flag in the buf log format structure | ||
| 705 | * and log the buf item. This will be used at recovery time | ||
| 706 | * to determine that copies of the buffer in the log before | ||
| 707 | * this should not be replayed. | ||
| 708 | * We mark the item descriptor and the transaction dirty so | ||
| 709 | * that we'll hold the buffer until after the commit. | ||
| 710 | * | ||
| 711 | * Since we're invalidating the buffer, we also clear the state | ||
| 712 | * about which parts of the buffer have been logged. We also | ||
| 713 | * clear the flag indicating that this is an inode buffer since | ||
| 714 | * the data in the buffer will no longer be valid. | ||
| 715 | * | ||
| 716 | * We set the stale bit in the buffer as well since we're getting | ||
| 717 | * rid of it. | ||
| 718 | */ | ||
| 719 | xfs_buf_stale(bp); | 664 | xfs_buf_stale(bp); |
| 665 | |||
| 720 | bip->bli_flags |= XFS_BLI_STALE; | 666 | bip->bli_flags |= XFS_BLI_STALE; |
| 721 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); | 667 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); |
| 722 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; | 668 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; |
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 279099717ed2..bcb60542fcf1 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
| @@ -17,9 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_bit.h" | ||
| 21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index f7590f5badea..8d71b16eccae 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 22 | #include "xfs_inum.h" | ||
| 23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 7a7442c03f2b..d2eee20d5f5b 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
| @@ -18,9 +18,7 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 8ab2ced415f1..fb62377d1cbc 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
| @@ -71,6 +71,8 @@ struct xfs_ail { | |||
| 71 | spinlock_t xa_lock; | 71 | spinlock_t xa_lock; |
| 72 | xfs_lsn_t xa_last_pushed_lsn; | 72 | xfs_lsn_t xa_last_pushed_lsn; |
| 73 | int xa_log_flush; | 73 | int xa_log_flush; |
| 74 | struct list_head xa_buf_list; | ||
| 75 | wait_queue_head_t xa_empty; | ||
| 74 | }; | 76 | }; |
| 75 | 77 | ||
| 76 | /* | 78 | /* |
| @@ -90,18 +92,22 @@ xfs_trans_ail_update( | |||
| 90 | } | 92 | } |
| 91 | 93 | ||
| 92 | void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, | 94 | void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, |
| 93 | struct xfs_log_item **log_items, int nr_items) | 95 | struct xfs_log_item **log_items, int nr_items, |
| 96 | int shutdown_type) | ||
| 94 | __releases(ailp->xa_lock); | 97 | __releases(ailp->xa_lock); |
| 95 | static inline void | 98 | static inline void |
| 96 | xfs_trans_ail_delete( | 99 | xfs_trans_ail_delete( |
| 97 | struct xfs_ail *ailp, | 100 | struct xfs_ail *ailp, |
| 98 | xfs_log_item_t *lip) __releases(ailp->xa_lock) | 101 | xfs_log_item_t *lip, |
| 102 | int shutdown_type) __releases(ailp->xa_lock) | ||
| 99 | { | 103 | { |
| 100 | xfs_trans_ail_delete_bulk(ailp, &lip, 1); | 104 | xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type); |
| 101 | } | 105 | } |
| 102 | 106 | ||
| 103 | void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); | 107 | void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); |
| 104 | void xfs_ail_push_all(struct xfs_ail *); | 108 | void xfs_ail_push_all(struct xfs_ail *); |
| 109 | void xfs_ail_push_all_sync(struct xfs_ail *); | ||
| 110 | struct xfs_log_item *xfs_ail_min(struct xfs_ail *ailp); | ||
| 105 | xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); | 111 | xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); |
| 106 | 112 | ||
| 107 | struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, | 113 | struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 65584b55607d..398cf681d025 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
| @@ -57,6 +57,7 @@ typedef __uint64_t __psunsigned_t; | |||
| 57 | #endif /* __KERNEL__ */ | 57 | #endif /* __KERNEL__ */ |
| 58 | 58 | ||
| 59 | typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */ | 59 | typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */ |
| 60 | typedef __uint32_t xfs_agino_t; /* inode # within allocation grp */ | ||
| 60 | typedef __uint32_t xfs_extlen_t; /* extent length in blocks */ | 61 | typedef __uint32_t xfs_extlen_t; /* extent length in blocks */ |
| 61 | typedef __uint32_t xfs_agnumber_t; /* allocation group number */ | 62 | typedef __uint32_t xfs_agnumber_t; /* allocation group number */ |
| 62 | typedef __int32_t xfs_extnum_t; /* # of extents in a file */ | 63 | typedef __int32_t xfs_extnum_t; /* # of extents in a file */ |
| @@ -101,6 +102,7 @@ typedef __uint64_t xfs_fileoff_t; /* block number in a file */ | |||
| 101 | typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ | 102 | typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ |
| 102 | typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ | 103 | typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ |
| 103 | 104 | ||
| 105 | |||
| 104 | /* | 106 | /* |
| 105 | * Null values for the types. | 107 | * Null values for the types. |
| 106 | */ | 108 | */ |
| @@ -120,6 +122,9 @@ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ | |||
| 120 | 122 | ||
| 121 | #define NULLCOMMITLSN ((xfs_lsn_t)-1) | 123 | #define NULLCOMMITLSN ((xfs_lsn_t)-1) |
| 122 | 124 | ||
| 125 | #define NULLFSINO ((xfs_ino_t)-1) | ||
| 126 | #define NULLAGINO ((xfs_agino_t)-1) | ||
| 127 | |||
| 123 | /* | 128 | /* |
| 124 | * Max values for extlen, extnum, aextnum. | 129 | * Max values for extlen, extnum, aextnum. |
| 125 | */ | 130 | */ |
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 79c05ac85bfe..4e5b9ad5cb97 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c | |||
| @@ -18,9 +18,7 @@ | |||
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
| 20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
| 21 | #include "xfs_bit.h" | ||
| 22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
| 23 | #include "xfs_inum.h" | ||
| 24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 64981d7e7375..b6a82d817a82 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
| @@ -21,7 +21,6 @@ | |||
| 21 | #include "xfs_types.h" | 21 | #include "xfs_types.h" |
| 22 | #include "xfs_bit.h" | 22 | #include "xfs_bit.h" |
| 23 | #include "xfs_log.h" | 23 | #include "xfs_log.h" |
| 24 | #include "xfs_inum.h" | ||
| 25 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
| 26 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
| 27 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
| @@ -39,7 +38,6 @@ | |||
| 39 | #include "xfs_bmap.h" | 38 | #include "xfs_bmap.h" |
| 40 | #include "xfs_acl.h" | 39 | #include "xfs_acl.h" |
| 41 | #include "xfs_attr.h" | 40 | #include "xfs_attr.h" |
| 42 | #include "xfs_rw.h" | ||
| 43 | #include "xfs_error.h" | 41 | #include "xfs_error.h" |
| 44 | #include "xfs_quota.h" | 42 | #include "xfs_quota.h" |
| 45 | #include "xfs_utils.h" | 43 | #include "xfs_utils.h" |
| @@ -81,8 +79,7 @@ xfs_readlink_bmap( | |||
| 81 | d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); | 79 | d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); |
| 82 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); | 80 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); |
| 83 | 81 | ||
| 84 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), | 82 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); |
| 85 | XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); | ||
| 86 | if (!bp) | 83 | if (!bp) |
| 87 | return XFS_ERROR(ENOMEM); | 84 | return XFS_ERROR(ENOMEM); |
| 88 | error = bp->b_error; | 85 | error = bp->b_error; |
| @@ -1919,7 +1916,7 @@ xfs_alloc_file_space( | |||
| 1919 | 1916 | ||
| 1920 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | 1917 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ |
| 1921 | xfs_bmap_cancel(&free_list); | 1918 | xfs_bmap_cancel(&free_list); |
| 1922 | xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); | 1919 | xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); |
| 1923 | 1920 | ||
| 1924 | error1: /* Just cancel transaction */ | 1921 | error1: /* Just cancel transaction */ |
| 1925 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 1922 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
| @@ -1966,7 +1963,7 @@ xfs_zero_remaining_bytes( | |||
| 1966 | 1963 | ||
| 1967 | bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? | 1964 | bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? |
| 1968 | mp->m_rtdev_targp : mp->m_ddev_targp, | 1965 | mp->m_rtdev_targp : mp->m_ddev_targp, |
| 1969 | mp->m_sb.sb_blocksize, XBF_DONT_BLOCK); | 1966 | BTOBB(mp->m_sb.sb_blocksize), 0); |
| 1970 | if (!bp) | 1967 | if (!bp) |
| 1971 | return XFS_ERROR(ENOMEM); | 1968 | return XFS_ERROR(ENOMEM); |
| 1972 | 1969 | ||
| @@ -2315,17 +2312,33 @@ xfs_change_file_space( | |||
| 2315 | case XFS_IOC_ALLOCSP64: | 2312 | case XFS_IOC_ALLOCSP64: |
| 2316 | case XFS_IOC_FREESP: | 2313 | case XFS_IOC_FREESP: |
| 2317 | case XFS_IOC_FREESP64: | 2314 | case XFS_IOC_FREESP64: |
| 2315 | /* | ||
| 2316 | * These operations actually do IO when extending the file, but | ||
| 2317 | * the allocation is done seperately to the zeroing that is | ||
| 2318 | * done. This set of operations need to be serialised against | ||
| 2319 | * other IO operations, such as truncate and buffered IO. We | ||
| 2320 | * need to take the IOLOCK here to serialise the allocation and | ||
| 2321 | * zeroing IO to prevent other IOLOCK holders (e.g. getbmap, | ||
| 2322 | * truncate, direct IO) from racing against the transient | ||
| 2323 | * allocated but not written state we can have here. | ||
| 2324 | */ | ||
| 2325 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
| 2318 | if (startoffset > fsize) { | 2326 | if (startoffset > fsize) { |
| 2319 | error = xfs_alloc_file_space(ip, fsize, | 2327 | error = xfs_alloc_file_space(ip, fsize, |
| 2320 | startoffset - fsize, 0, attr_flags); | 2328 | startoffset - fsize, 0, |
| 2321 | if (error) | 2329 | attr_flags | XFS_ATTR_NOLOCK); |
| 2330 | if (error) { | ||
| 2331 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
| 2322 | break; | 2332 | break; |
| 2333 | } | ||
| 2323 | } | 2334 | } |
| 2324 | 2335 | ||
| 2325 | iattr.ia_valid = ATTR_SIZE; | 2336 | iattr.ia_valid = ATTR_SIZE; |
| 2326 | iattr.ia_size = startoffset; | 2337 | iattr.ia_size = startoffset; |
| 2327 | 2338 | ||
| 2328 | error = xfs_setattr_size(ip, &iattr, attr_flags); | 2339 | error = xfs_setattr_size(ip, &iattr, |
| 2340 | attr_flags | XFS_ATTR_NOLOCK); | ||
| 2341 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
| 2329 | 2342 | ||
| 2330 | if (error) | 2343 | if (error) |
| 2331 | return error; | 2344 | return error; |
