diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-24 17:14:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-24 17:14:46 -0400 |
commit | 9978306e31a8f89bd81fbc4c49fd9aefb1d30d10 (patch) | |
tree | 85bbd03336a82d20a00761ed35eb05536936b881 | |
parent | abe81e25f08abbac493754a043f7a91a1b3e0f93 (diff) | |
parent | 14c26c6a05de138a4fd9a0c05ff8e7435a618324 (diff) |
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
Pull XFS update from Ben Myers:
- Removal of xfsbufd
- Background CIL flushes have been moved to a workqueue.
- Fix to xfs_check_page_type applicable to filesystems where
blocksize < page size
- Fix for stale data exposure when extsize hints are used.
- A series of xfs_buf cache cleanups.
- Fix for XFS_IOC_ALLOCSP
- Cleanups for includes and removal of xfs_lrw.[ch].
- Moved all busy extent handling to it's own file so that it is easier
to merge with userspace.
- Fix for log mount failure.
- Fix to enable inode reclaim during quotacheck at mount time.
- Fix for delalloc quota accounting.
- Fix for memory reclaim deadlock on agi buffer.
- Fixes for failed writes and to clean up stale delalloc blocks.
- Fix to use GFP_NOFS in blkdev_issue_flush
- SEEK_DATA/SEEK_HOLE support
* 'for-linus' of git://oss.sgi.com/xfs/xfs: (57 commits)
xfs: add trace points for log forces
xfs: fix memory reclaim deadlock on agi buffer
xfs: fix delalloc quota accounting on failure
xfs: protect xfs_sync_worker with s_umount semaphore
xfs: introduce SEEK_DATA/SEEK_HOLE support
xfs: make xfs_extent_busy_trim not static
xfs: make XBF_MAPPED the default behaviour
xfs: flush outstanding buffers on log mount failure
xfs: Properly exclude IO type flags from buffer flags
xfs: clean up xfs_bit.h includes
xfs: move xfs_do_force_shutdown() and kill xfs_rw.c
xfs: move xfs_get_extsz_hint() and kill xfs_rw.h
xfs: move xfs_fsb_to_db to xfs_bmap.h
xfs: clean up busy extent naming
xfs: move busy extent handling to it's own file
xfs: move xfsagino_t to xfs_types.h
xfs: use iolock on XFS_IOC_ALLOCSP calls
xfs: kill XBF_DONTBLOCK
xfs: kill xfs_read_buf()
xfs: kill XBF_LOCK
...
80 files changed, 2459 insertions, 2852 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index eaff0392eb32..150a29f3cd33 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -7623,7 +7623,7 @@ XFS FILESYSTEM | |||
7623 | P: Silicon Graphics Inc | 7623 | P: Silicon Graphics Inc |
7624 | M: Ben Myers <bpm@sgi.com> | 7624 | M: Ben Myers <bpm@sgi.com> |
7625 | M: Alex Elder <elder@kernel.org> | 7625 | M: Alex Elder <elder@kernel.org> |
7626 | M: xfs-masters@oss.sgi.com | 7626 | M: xfs@oss.sgi.com |
7627 | L: xfs@oss.sgi.com | 7627 | L: xfs@oss.sgi.com |
7628 | W: http://oss.sgi.com/projects/xfs | 7628 | W: http://oss.sgi.com/projects/xfs |
7629 | T: git git://oss.sgi.com/xfs/xfs.git | 7629 | T: git git://oss.sgi.com/xfs/xfs.git |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0a9977983f92..d2bf974b1a2f 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -33,6 +33,7 @@ xfs-y += xfs_aops.o \ | |||
33 | xfs_discard.o \ | 33 | xfs_discard.o \ |
34 | xfs_error.o \ | 34 | xfs_error.o \ |
35 | xfs_export.o \ | 35 | xfs_export.o \ |
36 | xfs_extent_busy.o \ | ||
36 | xfs_file.o \ | 37 | xfs_file.o \ |
37 | xfs_filestream.o \ | 38 | xfs_filestream.o \ |
38 | xfs_fsops.o \ | 39 | xfs_fsops.o \ |
@@ -49,7 +50,6 @@ xfs-y += xfs_aops.o \ | |||
49 | xfs_sync.o \ | 50 | xfs_sync.o \ |
50 | xfs_xattr.o \ | 51 | xfs_xattr.o \ |
51 | xfs_rename.o \ | 52 | xfs_rename.o \ |
52 | xfs_rw.o \ | ||
53 | xfs_utils.o \ | 53 | xfs_utils.o \ |
54 | xfs_vnodeops.o \ | 54 | xfs_vnodeops.o \ |
55 | kmem.o \ | 55 | kmem.o \ |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 4805f009f923..44d65c1533c0 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -175,24 +175,6 @@ typedef struct xfs_agfl { | |||
175 | } xfs_agfl_t; | 175 | } xfs_agfl_t; |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that | ||
179 | * have been freed but whose transactions aren't committed to disk yet. | ||
180 | * | ||
181 | * Note that we use the transaction ID to record the transaction, not the | ||
182 | * transaction structure itself. See xfs_alloc_busy_insert() for details. | ||
183 | */ | ||
184 | struct xfs_busy_extent { | ||
185 | struct rb_node rb_node; /* ag by-bno indexed search tree */ | ||
186 | struct list_head list; /* transaction busy extent list */ | ||
187 | xfs_agnumber_t agno; | ||
188 | xfs_agblock_t bno; | ||
189 | xfs_extlen_t length; | ||
190 | unsigned int flags; | ||
191 | #define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ | ||
192 | #define XFS_ALLOC_BUSY_SKIP_DISCARD 0x02 /* do not discard */ | ||
193 | }; | ||
194 | |||
195 | /* | ||
196 | * Per-ag incore structure, copies of information in agf and agi, | 178 | * Per-ag incore structure, copies of information in agf and agi, |
197 | * to improve the performance of allocation group selection. | 179 | * to improve the performance of allocation group selection. |
198 | */ | 180 | */ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 0f0df2759b09..229641fb8e67 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
@@ -32,6 +31,7 @@ | |||
32 | #include "xfs_inode.h" | 31 | #include "xfs_inode.h" |
33 | #include "xfs_btree.h" | 32 | #include "xfs_btree.h" |
34 | #include "xfs_alloc.h" | 33 | #include "xfs_alloc.h" |
34 | #include "xfs_extent_busy.h" | ||
35 | #include "xfs_error.h" | 35 | #include "xfs_error.h" |
36 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
37 | 37 | ||
@@ -47,8 +47,6 @@ STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); | |||
47 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); | 47 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); |
48 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, | 48 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, |
49 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); | 49 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); |
50 | STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *, | ||
51 | xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *); | ||
52 | 50 | ||
53 | /* | 51 | /* |
54 | * Lookup the record equal to [bno, len] in the btree given by cur. | 52 | * Lookup the record equal to [bno, len] in the btree given by cur. |
@@ -152,7 +150,7 @@ xfs_alloc_compute_aligned( | |||
152 | xfs_extlen_t len; | 150 | xfs_extlen_t len; |
153 | 151 | ||
154 | /* Trim busy sections out of found extent */ | 152 | /* Trim busy sections out of found extent */ |
155 | xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len); | 153 | xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); |
156 | 154 | ||
157 | if (args->alignment > 1 && len >= args->minlen) { | 155 | if (args->alignment > 1 && len >= args->minlen) { |
158 | xfs_agblock_t aligned_bno = roundup(bno, args->alignment); | 156 | xfs_agblock_t aligned_bno = roundup(bno, args->alignment); |
@@ -536,7 +534,7 @@ xfs_alloc_ag_vextent( | |||
536 | if (error) | 534 | if (error) |
537 | return error; | 535 | return error; |
538 | 536 | ||
539 | ASSERT(!xfs_alloc_busy_search(args->mp, args->agno, | 537 | ASSERT(!xfs_extent_busy_search(args->mp, args->agno, |
540 | args->agbno, args->len)); | 538 | args->agbno, args->len)); |
541 | } | 539 | } |
542 | 540 | ||
@@ -603,7 +601,7 @@ xfs_alloc_ag_vextent_exact( | |||
603 | /* | 601 | /* |
604 | * Check for overlapping busy extents. | 602 | * Check for overlapping busy extents. |
605 | */ | 603 | */ |
606 | xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen); | 604 | xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); |
607 | 605 | ||
608 | /* | 606 | /* |
609 | * Give up if the start of the extent is busy, or the freespace isn't | 607 | * Give up if the start of the extent is busy, or the freespace isn't |
@@ -1391,7 +1389,7 @@ xfs_alloc_ag_vextent_small( | |||
1391 | if (error) | 1389 | if (error) |
1392 | goto error0; | 1390 | goto error0; |
1393 | if (fbno != NULLAGBLOCK) { | 1391 | if (fbno != NULLAGBLOCK) { |
1394 | xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1, | 1392 | xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, |
1395 | args->userdata); | 1393 | args->userdata); |
1396 | 1394 | ||
1397 | if (args->userdata) { | 1395 | if (args->userdata) { |
@@ -2496,579 +2494,8 @@ xfs_free_extent( | |||
2496 | 2494 | ||
2497 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); | 2495 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); |
2498 | if (!error) | 2496 | if (!error) |
2499 | xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0); | 2497 | xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); |
2500 | error0: | 2498 | error0: |
2501 | xfs_perag_put(args.pag); | 2499 | xfs_perag_put(args.pag); |
2502 | return error; | 2500 | return error; |
2503 | } | 2501 | } |
2504 | |||
2505 | void | ||
2506 | xfs_alloc_busy_insert( | ||
2507 | struct xfs_trans *tp, | ||
2508 | xfs_agnumber_t agno, | ||
2509 | xfs_agblock_t bno, | ||
2510 | xfs_extlen_t len, | ||
2511 | unsigned int flags) | ||
2512 | { | ||
2513 | struct xfs_busy_extent *new; | ||
2514 | struct xfs_busy_extent *busyp; | ||
2515 | struct xfs_perag *pag; | ||
2516 | struct rb_node **rbp; | ||
2517 | struct rb_node *parent = NULL; | ||
2518 | |||
2519 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); | ||
2520 | if (!new) { | ||
2521 | /* | ||
2522 | * No Memory! Since it is now not possible to track the free | ||
2523 | * block, make this a synchronous transaction to insure that | ||
2524 | * the block is not reused before this transaction commits. | ||
2525 | */ | ||
2526 | trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len); | ||
2527 | xfs_trans_set_sync(tp); | ||
2528 | return; | ||
2529 | } | ||
2530 | |||
2531 | new->agno = agno; | ||
2532 | new->bno = bno; | ||
2533 | new->length = len; | ||
2534 | INIT_LIST_HEAD(&new->list); | ||
2535 | new->flags = flags; | ||
2536 | |||
2537 | /* trace before insert to be able to see failed inserts */ | ||
2538 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); | ||
2539 | |||
2540 | pag = xfs_perag_get(tp->t_mountp, new->agno); | ||
2541 | spin_lock(&pag->pagb_lock); | ||
2542 | rbp = &pag->pagb_tree.rb_node; | ||
2543 | while (*rbp) { | ||
2544 | parent = *rbp; | ||
2545 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | ||
2546 | |||
2547 | if (new->bno < busyp->bno) { | ||
2548 | rbp = &(*rbp)->rb_left; | ||
2549 | ASSERT(new->bno + new->length <= busyp->bno); | ||
2550 | } else if (new->bno > busyp->bno) { | ||
2551 | rbp = &(*rbp)->rb_right; | ||
2552 | ASSERT(bno >= busyp->bno + busyp->length); | ||
2553 | } else { | ||
2554 | ASSERT(0); | ||
2555 | } | ||
2556 | } | ||
2557 | |||
2558 | rb_link_node(&new->rb_node, parent, rbp); | ||
2559 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | ||
2560 | |||
2561 | list_add(&new->list, &tp->t_busy); | ||
2562 | spin_unlock(&pag->pagb_lock); | ||
2563 | xfs_perag_put(pag); | ||
2564 | } | ||
2565 | |||
2566 | /* | ||
2567 | * Search for a busy extent within the range of the extent we are about to | ||
2568 | * allocate. You need to be holding the busy extent tree lock when calling | ||
2569 | * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy | ||
2570 | * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact | ||
2571 | * match. This is done so that a non-zero return indicates an overlap that | ||
2572 | * will require a synchronous transaction, but it can still be | ||
2573 | * used to distinguish between a partial or exact match. | ||
2574 | */ | ||
2575 | int | ||
2576 | xfs_alloc_busy_search( | ||
2577 | struct xfs_mount *mp, | ||
2578 | xfs_agnumber_t agno, | ||
2579 | xfs_agblock_t bno, | ||
2580 | xfs_extlen_t len) | ||
2581 | { | ||
2582 | struct xfs_perag *pag; | ||
2583 | struct rb_node *rbp; | ||
2584 | struct xfs_busy_extent *busyp; | ||
2585 | int match = 0; | ||
2586 | |||
2587 | pag = xfs_perag_get(mp, agno); | ||
2588 | spin_lock(&pag->pagb_lock); | ||
2589 | |||
2590 | rbp = pag->pagb_tree.rb_node; | ||
2591 | |||
2592 | /* find closest start bno overlap */ | ||
2593 | while (rbp) { | ||
2594 | busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
2595 | if (bno < busyp->bno) { | ||
2596 | /* may overlap, but exact start block is lower */ | ||
2597 | if (bno + len > busyp->bno) | ||
2598 | match = -1; | ||
2599 | rbp = rbp->rb_left; | ||
2600 | } else if (bno > busyp->bno) { | ||
2601 | /* may overlap, but exact start block is higher */ | ||
2602 | if (bno < busyp->bno + busyp->length) | ||
2603 | match = -1; | ||
2604 | rbp = rbp->rb_right; | ||
2605 | } else { | ||
2606 | /* bno matches busyp, length determines exact match */ | ||
2607 | match = (busyp->length == len) ? 1 : -1; | ||
2608 | break; | ||
2609 | } | ||
2610 | } | ||
2611 | spin_unlock(&pag->pagb_lock); | ||
2612 | xfs_perag_put(pag); | ||
2613 | return match; | ||
2614 | } | ||
2615 | |||
2616 | /* | ||
2617 | * The found free extent [fbno, fend] overlaps part or all of the given busy | ||
2618 | * extent. If the overlap covers the beginning, the end, or all of the busy | ||
2619 | * extent, the overlapping portion can be made unbusy and used for the | ||
2620 | * allocation. We can't split a busy extent because we can't modify a | ||
2621 | * transaction/CIL context busy list, but we can update an entries block | ||
2622 | * number or length. | ||
2623 | * | ||
2624 | * Returns true if the extent can safely be reused, or false if the search | ||
2625 | * needs to be restarted. | ||
2626 | */ | ||
2627 | STATIC bool | ||
2628 | xfs_alloc_busy_update_extent( | ||
2629 | struct xfs_mount *mp, | ||
2630 | struct xfs_perag *pag, | ||
2631 | struct xfs_busy_extent *busyp, | ||
2632 | xfs_agblock_t fbno, | ||
2633 | xfs_extlen_t flen, | ||
2634 | bool userdata) | ||
2635 | { | ||
2636 | xfs_agblock_t fend = fbno + flen; | ||
2637 | xfs_agblock_t bbno = busyp->bno; | ||
2638 | xfs_agblock_t bend = bbno + busyp->length; | ||
2639 | |||
2640 | /* | ||
2641 | * This extent is currently being discarded. Give the thread | ||
2642 | * performing the discard a chance to mark the extent unbusy | ||
2643 | * and retry. | ||
2644 | */ | ||
2645 | if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) { | ||
2646 | spin_unlock(&pag->pagb_lock); | ||
2647 | delay(1); | ||
2648 | spin_lock(&pag->pagb_lock); | ||
2649 | return false; | ||
2650 | } | ||
2651 | |||
2652 | /* | ||
2653 | * If there is a busy extent overlapping a user allocation, we have | ||
2654 | * no choice but to force the log and retry the search. | ||
2655 | * | ||
2656 | * Fortunately this does not happen during normal operation, but | ||
2657 | * only if the filesystem is very low on space and has to dip into | ||
2658 | * the AGFL for normal allocations. | ||
2659 | */ | ||
2660 | if (userdata) | ||
2661 | goto out_force_log; | ||
2662 | |||
2663 | if (bbno < fbno && bend > fend) { | ||
2664 | /* | ||
2665 | * Case 1: | ||
2666 | * bbno bend | ||
2667 | * +BBBBBBBBBBBBBBBBB+ | ||
2668 | * +---------+ | ||
2669 | * fbno fend | ||
2670 | */ | ||
2671 | |||
2672 | /* | ||
2673 | * We would have to split the busy extent to be able to track | ||
2674 | * it correct, which we cannot do because we would have to | ||
2675 | * modify the list of busy extents attached to the transaction | ||
2676 | * or CIL context, which is immutable. | ||
2677 | * | ||
2678 | * Force out the log to clear the busy extent and retry the | ||
2679 | * search. | ||
2680 | */ | ||
2681 | goto out_force_log; | ||
2682 | } else if (bbno >= fbno && bend <= fend) { | ||
2683 | /* | ||
2684 | * Case 2: | ||
2685 | * bbno bend | ||
2686 | * +BBBBBBBBBBBBBBBBB+ | ||
2687 | * +-----------------+ | ||
2688 | * fbno fend | ||
2689 | * | ||
2690 | * Case 3: | ||
2691 | * bbno bend | ||
2692 | * +BBBBBBBBBBBBBBBBB+ | ||
2693 | * +--------------------------+ | ||
2694 | * fbno fend | ||
2695 | * | ||
2696 | * Case 4: | ||
2697 | * bbno bend | ||
2698 | * +BBBBBBBBBBBBBBBBB+ | ||
2699 | * +--------------------------+ | ||
2700 | * fbno fend | ||
2701 | * | ||
2702 | * Case 5: | ||
2703 | * bbno bend | ||
2704 | * +BBBBBBBBBBBBBBBBB+ | ||
2705 | * +-----------------------------------+ | ||
2706 | * fbno fend | ||
2707 | * | ||
2708 | */ | ||
2709 | |||
2710 | /* | ||
2711 | * The busy extent is fully covered by the extent we are | ||
2712 | * allocating, and can simply be removed from the rbtree. | ||
2713 | * However we cannot remove it from the immutable list | ||
2714 | * tracking busy extents in the transaction or CIL context, | ||
2715 | * so set the length to zero to mark it invalid. | ||
2716 | * | ||
2717 | * We also need to restart the busy extent search from the | ||
2718 | * tree root, because erasing the node can rearrange the | ||
2719 | * tree topology. | ||
2720 | */ | ||
2721 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2722 | busyp->length = 0; | ||
2723 | return false; | ||
2724 | } else if (fend < bend) { | ||
2725 | /* | ||
2726 | * Case 6: | ||
2727 | * bbno bend | ||
2728 | * +BBBBBBBBBBBBBBBBB+ | ||
2729 | * +---------+ | ||
2730 | * fbno fend | ||
2731 | * | ||
2732 | * Case 7: | ||
2733 | * bbno bend | ||
2734 | * +BBBBBBBBBBBBBBBBB+ | ||
2735 | * +------------------+ | ||
2736 | * fbno fend | ||
2737 | * | ||
2738 | */ | ||
2739 | busyp->bno = fend; | ||
2740 | } else if (bbno < fbno) { | ||
2741 | /* | ||
2742 | * Case 8: | ||
2743 | * bbno bend | ||
2744 | * +BBBBBBBBBBBBBBBBB+ | ||
2745 | * +-------------+ | ||
2746 | * fbno fend | ||
2747 | * | ||
2748 | * Case 9: | ||
2749 | * bbno bend | ||
2750 | * +BBBBBBBBBBBBBBBBB+ | ||
2751 | * +----------------------+ | ||
2752 | * fbno fend | ||
2753 | */ | ||
2754 | busyp->length = fbno - busyp->bno; | ||
2755 | } else { | ||
2756 | ASSERT(0); | ||
2757 | } | ||
2758 | |||
2759 | trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen); | ||
2760 | return true; | ||
2761 | |||
2762 | out_force_log: | ||
2763 | spin_unlock(&pag->pagb_lock); | ||
2764 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
2765 | trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen); | ||
2766 | spin_lock(&pag->pagb_lock); | ||
2767 | return false; | ||
2768 | } | ||
2769 | |||
2770 | |||
2771 | /* | ||
2772 | * For a given extent [fbno, flen], make sure we can reuse it safely. | ||
2773 | */ | ||
2774 | void | ||
2775 | xfs_alloc_busy_reuse( | ||
2776 | struct xfs_mount *mp, | ||
2777 | xfs_agnumber_t agno, | ||
2778 | xfs_agblock_t fbno, | ||
2779 | xfs_extlen_t flen, | ||
2780 | bool userdata) | ||
2781 | { | ||
2782 | struct xfs_perag *pag; | ||
2783 | struct rb_node *rbp; | ||
2784 | |||
2785 | ASSERT(flen > 0); | ||
2786 | |||
2787 | pag = xfs_perag_get(mp, agno); | ||
2788 | spin_lock(&pag->pagb_lock); | ||
2789 | restart: | ||
2790 | rbp = pag->pagb_tree.rb_node; | ||
2791 | while (rbp) { | ||
2792 | struct xfs_busy_extent *busyp = | ||
2793 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
2794 | xfs_agblock_t bbno = busyp->bno; | ||
2795 | xfs_agblock_t bend = bbno + busyp->length; | ||
2796 | |||
2797 | if (fbno + flen <= bbno) { | ||
2798 | rbp = rbp->rb_left; | ||
2799 | continue; | ||
2800 | } else if (fbno >= bend) { | ||
2801 | rbp = rbp->rb_right; | ||
2802 | continue; | ||
2803 | } | ||
2804 | |||
2805 | if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen, | ||
2806 | userdata)) | ||
2807 | goto restart; | ||
2808 | } | ||
2809 | spin_unlock(&pag->pagb_lock); | ||
2810 | xfs_perag_put(pag); | ||
2811 | } | ||
2812 | |||
2813 | /* | ||
2814 | * For a given extent [fbno, flen], search the busy extent list to find a | ||
2815 | * subset of the extent that is not busy. If *rlen is smaller than | ||
2816 | * args->minlen no suitable extent could be found, and the higher level | ||
2817 | * code needs to force out the log and retry the allocation. | ||
2818 | */ | ||
2819 | STATIC void | ||
2820 | xfs_alloc_busy_trim( | ||
2821 | struct xfs_alloc_arg *args, | ||
2822 | xfs_agblock_t bno, | ||
2823 | xfs_extlen_t len, | ||
2824 | xfs_agblock_t *rbno, | ||
2825 | xfs_extlen_t *rlen) | ||
2826 | { | ||
2827 | xfs_agblock_t fbno; | ||
2828 | xfs_extlen_t flen; | ||
2829 | struct rb_node *rbp; | ||
2830 | |||
2831 | ASSERT(len > 0); | ||
2832 | |||
2833 | spin_lock(&args->pag->pagb_lock); | ||
2834 | restart: | ||
2835 | fbno = bno; | ||
2836 | flen = len; | ||
2837 | rbp = args->pag->pagb_tree.rb_node; | ||
2838 | while (rbp && flen >= args->minlen) { | ||
2839 | struct xfs_busy_extent *busyp = | ||
2840 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
2841 | xfs_agblock_t fend = fbno + flen; | ||
2842 | xfs_agblock_t bbno = busyp->bno; | ||
2843 | xfs_agblock_t bend = bbno + busyp->length; | ||
2844 | |||
2845 | if (fend <= bbno) { | ||
2846 | rbp = rbp->rb_left; | ||
2847 | continue; | ||
2848 | } else if (fbno >= bend) { | ||
2849 | rbp = rbp->rb_right; | ||
2850 | continue; | ||
2851 | } | ||
2852 | |||
2853 | /* | ||
2854 | * If this is a metadata allocation, try to reuse the busy | ||
2855 | * extent instead of trimming the allocation. | ||
2856 | */ | ||
2857 | if (!args->userdata && | ||
2858 | !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) { | ||
2859 | if (!xfs_alloc_busy_update_extent(args->mp, args->pag, | ||
2860 | busyp, fbno, flen, | ||
2861 | false)) | ||
2862 | goto restart; | ||
2863 | continue; | ||
2864 | } | ||
2865 | |||
2866 | if (bbno <= fbno) { | ||
2867 | /* start overlap */ | ||
2868 | |||
2869 | /* | ||
2870 | * Case 1: | ||
2871 | * bbno bend | ||
2872 | * +BBBBBBBBBBBBBBBBB+ | ||
2873 | * +---------+ | ||
2874 | * fbno fend | ||
2875 | * | ||
2876 | * Case 2: | ||
2877 | * bbno bend | ||
2878 | * +BBBBBBBBBBBBBBBBB+ | ||
2879 | * +-------------+ | ||
2880 | * fbno fend | ||
2881 | * | ||
2882 | * Case 3: | ||
2883 | * bbno bend | ||
2884 | * +BBBBBBBBBBBBBBBBB+ | ||
2885 | * +-------------+ | ||
2886 | * fbno fend | ||
2887 | * | ||
2888 | * Case 4: | ||
2889 | * bbno bend | ||
2890 | * +BBBBBBBBBBBBBBBBB+ | ||
2891 | * +-----------------+ | ||
2892 | * fbno fend | ||
2893 | * | ||
2894 | * No unbusy region in extent, return failure. | ||
2895 | */ | ||
2896 | if (fend <= bend) | ||
2897 | goto fail; | ||
2898 | |||
2899 | /* | ||
2900 | * Case 5: | ||
2901 | * bbno bend | ||
2902 | * +BBBBBBBBBBBBBBBBB+ | ||
2903 | * +----------------------+ | ||
2904 | * fbno fend | ||
2905 | * | ||
2906 | * Case 6: | ||
2907 | * bbno bend | ||
2908 | * +BBBBBBBBBBBBBBBBB+ | ||
2909 | * +--------------------------+ | ||
2910 | * fbno fend | ||
2911 | * | ||
2912 | * Needs to be trimmed to: | ||
2913 | * +-------+ | ||
2914 | * fbno fend | ||
2915 | */ | ||
2916 | fbno = bend; | ||
2917 | } else if (bend >= fend) { | ||
2918 | /* end overlap */ | ||
2919 | |||
2920 | /* | ||
2921 | * Case 7: | ||
2922 | * bbno bend | ||
2923 | * +BBBBBBBBBBBBBBBBB+ | ||
2924 | * +------------------+ | ||
2925 | * fbno fend | ||
2926 | * | ||
2927 | * Case 8: | ||
2928 | * bbno bend | ||
2929 | * +BBBBBBBBBBBBBBBBB+ | ||
2930 | * +--------------------------+ | ||
2931 | * fbno fend | ||
2932 | * | ||
2933 | * Needs to be trimmed to: | ||
2934 | * +-------+ | ||
2935 | * fbno fend | ||
2936 | */ | ||
2937 | fend = bbno; | ||
2938 | } else { | ||
2939 | /* middle overlap */ | ||
2940 | |||
2941 | /* | ||
2942 | * Case 9: | ||
2943 | * bbno bend | ||
2944 | * +BBBBBBBBBBBBBBBBB+ | ||
2945 | * +-----------------------------------+ | ||
2946 | * fbno fend | ||
2947 | * | ||
2948 | * Can be trimmed to: | ||
2949 | * +-------+ OR +-------+ | ||
2950 | * fbno fend fbno fend | ||
2951 | * | ||
2952 | * Backward allocation leads to significant | ||
2953 | * fragmentation of directories, which degrades | ||
2954 | * directory performance, therefore we always want to | ||
2955 | * choose the option that produces forward allocation | ||
2956 | * patterns. | ||
2957 | * Preferring the lower bno extent will make the next | ||
2958 | * request use "fend" as the start of the next | ||
2959 | * allocation; if the segment is no longer busy at | ||
2960 | * that point, we'll get a contiguous allocation, but | ||
2961 | * even if it is still busy, we will get a forward | ||
2962 | * allocation. | ||
2963 | * We try to avoid choosing the segment at "bend", | ||
2964 | * because that can lead to the next allocation | ||
2965 | * taking the segment at "fbno", which would be a | ||
2966 | * backward allocation. We only use the segment at | ||
2967 | * "fbno" if it is much larger than the current | ||
2968 | * requested size, because in that case there's a | ||
2969 | * good chance subsequent allocations will be | ||
2970 | * contiguous. | ||
2971 | */ | ||
2972 | if (bbno - fbno >= args->maxlen) { | ||
2973 | /* left candidate fits perfect */ | ||
2974 | fend = bbno; | ||
2975 | } else if (fend - bend >= args->maxlen * 4) { | ||
2976 | /* right candidate has enough free space */ | ||
2977 | fbno = bend; | ||
2978 | } else if (bbno - fbno >= args->minlen) { | ||
2979 | /* left candidate fits minimum requirement */ | ||
2980 | fend = bbno; | ||
2981 | } else { | ||
2982 | goto fail; | ||
2983 | } | ||
2984 | } | ||
2985 | |||
2986 | flen = fend - fbno; | ||
2987 | } | ||
2988 | spin_unlock(&args->pag->pagb_lock); | ||
2989 | |||
2990 | if (fbno != bno || flen != len) { | ||
2991 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, | ||
2992 | fbno, flen); | ||
2993 | } | ||
2994 | *rbno = fbno; | ||
2995 | *rlen = flen; | ||
2996 | return; | ||
2997 | fail: | ||
2998 | /* | ||
2999 | * Return a zero extent length as failure indications. All callers | ||
3000 | * re-check if the trimmed extent satisfies the minlen requirement. | ||
3001 | */ | ||
3002 | spin_unlock(&args->pag->pagb_lock); | ||
3003 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0); | ||
3004 | *rbno = fbno; | ||
3005 | *rlen = 0; | ||
3006 | } | ||
3007 | |||
3008 | static void | ||
3009 | xfs_alloc_busy_clear_one( | ||
3010 | struct xfs_mount *mp, | ||
3011 | struct xfs_perag *pag, | ||
3012 | struct xfs_busy_extent *busyp) | ||
3013 | { | ||
3014 | if (busyp->length) { | ||
3015 | trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno, | ||
3016 | busyp->length); | ||
3017 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
3018 | } | ||
3019 | |||
3020 | list_del_init(&busyp->list); | ||
3021 | kmem_free(busyp); | ||
3022 | } | ||
3023 | |||
3024 | /* | ||
3025 | * Remove all extents on the passed in list from the busy extents tree. | ||
3026 | * If do_discard is set skip extents that need to be discarded, and mark | ||
3027 | * these as undergoing a discard operation instead. | ||
3028 | */ | ||
3029 | void | ||
3030 | xfs_alloc_busy_clear( | ||
3031 | struct xfs_mount *mp, | ||
3032 | struct list_head *list, | ||
3033 | bool do_discard) | ||
3034 | { | ||
3035 | struct xfs_busy_extent *busyp, *n; | ||
3036 | struct xfs_perag *pag = NULL; | ||
3037 | xfs_agnumber_t agno = NULLAGNUMBER; | ||
3038 | |||
3039 | list_for_each_entry_safe(busyp, n, list, list) { | ||
3040 | if (busyp->agno != agno) { | ||
3041 | if (pag) { | ||
3042 | spin_unlock(&pag->pagb_lock); | ||
3043 | xfs_perag_put(pag); | ||
3044 | } | ||
3045 | pag = xfs_perag_get(mp, busyp->agno); | ||
3046 | spin_lock(&pag->pagb_lock); | ||
3047 | agno = busyp->agno; | ||
3048 | } | ||
3049 | |||
3050 | if (do_discard && busyp->length && | ||
3051 | !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD)) | ||
3052 | busyp->flags = XFS_ALLOC_BUSY_DISCARDED; | ||
3053 | else | ||
3054 | xfs_alloc_busy_clear_one(mp, pag, busyp); | ||
3055 | } | ||
3056 | |||
3057 | if (pag) { | ||
3058 | spin_unlock(&pag->pagb_lock); | ||
3059 | xfs_perag_put(pag); | ||
3060 | } | ||
3061 | } | ||
3062 | |||
3063 | /* | ||
3064 | * Callback for list_sort to sort busy extents by the AG they reside in. | ||
3065 | */ | ||
3066 | int | ||
3067 | xfs_busy_extent_ag_cmp( | ||
3068 | void *priv, | ||
3069 | struct list_head *a, | ||
3070 | struct list_head *b) | ||
3071 | { | ||
3072 | return container_of(a, struct xfs_busy_extent, list)->agno - | ||
3073 | container_of(b, struct xfs_busy_extent, list)->agno; | ||
3074 | } | ||
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 3a7e7d8f8ded..93be4a667ca1 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -23,7 +23,6 @@ struct xfs_btree_cur; | |||
23 | struct xfs_mount; | 23 | struct xfs_mount; |
24 | struct xfs_perag; | 24 | struct xfs_perag; |
25 | struct xfs_trans; | 25 | struct xfs_trans; |
26 | struct xfs_busy_extent; | ||
27 | 26 | ||
28 | extern struct workqueue_struct *xfs_alloc_wq; | 27 | extern struct workqueue_struct *xfs_alloc_wq; |
29 | 28 | ||
@@ -139,33 +138,6 @@ xfs_extlen_t | |||
139 | xfs_alloc_longest_free_extent(struct xfs_mount *mp, | 138 | xfs_alloc_longest_free_extent(struct xfs_mount *mp, |
140 | struct xfs_perag *pag); | 139 | struct xfs_perag *pag); |
141 | 140 | ||
142 | #ifdef __KERNEL__ | ||
143 | void | ||
144 | xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, | ||
145 | xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); | ||
146 | |||
147 | void | ||
148 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list, | ||
149 | bool do_discard); | ||
150 | |||
151 | int | ||
152 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
153 | xfs_agblock_t bno, xfs_extlen_t len); | ||
154 | |||
155 | void | ||
156 | xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
157 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); | ||
158 | |||
159 | int | ||
160 | xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b); | ||
161 | |||
162 | static inline void xfs_alloc_busy_sort(struct list_head *list) | ||
163 | { | ||
164 | list_sort(NULL, list, xfs_busy_extent_ag_cmp); | ||
165 | } | ||
166 | |||
167 | #endif /* __KERNEL__ */ | ||
168 | |||
169 | /* | 141 | /* |
170 | * Compute and fill in value of m_ag_maxlevels. | 142 | * Compute and fill in value of m_ag_maxlevels. |
171 | */ | 143 | */ |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index ffb3386e45c1..f1647caace8f 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -18,9 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
@@ -32,6 +30,7 @@ | |||
32 | #include "xfs_inode.h" | 30 | #include "xfs_inode.h" |
33 | #include "xfs_btree.h" | 31 | #include "xfs_btree.h" |
34 | #include "xfs_alloc.h" | 32 | #include "xfs_alloc.h" |
33 | #include "xfs_extent_busy.h" | ||
35 | #include "xfs_error.h" | 34 | #include "xfs_error.h" |
36 | #include "xfs_trace.h" | 35 | #include "xfs_trace.h" |
37 | 36 | ||
@@ -94,7 +93,7 @@ xfs_allocbt_alloc_block( | |||
94 | return 0; | 93 | return 0; |
95 | } | 94 | } |
96 | 95 | ||
97 | xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); | 96 | xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); |
98 | 97 | ||
99 | xfs_trans_agbtree_delta(cur->bc_tp, 1); | 98 | xfs_trans_agbtree_delta(cur->bc_tp, 1); |
100 | new->s = cpu_to_be32(bno); | 99 | new->s = cpu_to_be32(bno); |
@@ -119,8 +118,8 @@ xfs_allocbt_free_block( | |||
119 | if (error) | 118 | if (error) |
120 | return error; | 119 | return error; |
121 | 120 | ||
122 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, | 121 | xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, |
123 | XFS_ALLOC_BUSY_SKIP_DISCARD); | 122 | XFS_EXTENT_BUSY_SKIP_DISCARD); |
124 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 123 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
125 | return 0; | 124 | return 0; |
126 | } | 125 | } |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0dbb9e70fe21..ae31c313a79e 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -16,9 +16,7 @@ | |||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_bit.h" | ||
20 | #include "xfs_log.h" | 19 | #include "xfs_log.h" |
21 | #include "xfs_inum.h" | ||
22 | #include "xfs_sb.h" | 20 | #include "xfs_sb.h" |
23 | #include "xfs_ag.h" | 21 | #include "xfs_ag.h" |
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
@@ -29,7 +27,6 @@ | |||
29 | #include "xfs_inode_item.h" | 27 | #include "xfs_inode_item.h" |
30 | #include "xfs_alloc.h" | 28 | #include "xfs_alloc.h" |
31 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
32 | #include "xfs_rw.h" | ||
33 | #include "xfs_iomap.h" | 30 | #include "xfs_iomap.h" |
34 | #include "xfs_vnodeops.h" | 31 | #include "xfs_vnodeops.h" |
35 | #include "xfs_trace.h" | 32 | #include "xfs_trace.h" |
@@ -623,7 +620,7 @@ xfs_map_at_offset( | |||
623 | * or delayed allocate extent. | 620 | * or delayed allocate extent. |
624 | */ | 621 | */ |
625 | STATIC int | 622 | STATIC int |
626 | xfs_is_delayed_page( | 623 | xfs_check_page_type( |
627 | struct page *page, | 624 | struct page *page, |
628 | unsigned int type) | 625 | unsigned int type) |
629 | { | 626 | { |
@@ -637,11 +634,11 @@ xfs_is_delayed_page( | |||
637 | bh = head = page_buffers(page); | 634 | bh = head = page_buffers(page); |
638 | do { | 635 | do { |
639 | if (buffer_unwritten(bh)) | 636 | if (buffer_unwritten(bh)) |
640 | acceptable = (type == IO_UNWRITTEN); | 637 | acceptable += (type == IO_UNWRITTEN); |
641 | else if (buffer_delay(bh)) | 638 | else if (buffer_delay(bh)) |
642 | acceptable = (type == IO_DELALLOC); | 639 | acceptable += (type == IO_DELALLOC); |
643 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 640 | else if (buffer_dirty(bh) && buffer_mapped(bh)) |
644 | acceptable = (type == IO_OVERWRITE); | 641 | acceptable += (type == IO_OVERWRITE); |
645 | else | 642 | else |
646 | break; | 643 | break; |
647 | } while ((bh = bh->b_this_page) != head); | 644 | } while ((bh = bh->b_this_page) != head); |
@@ -684,7 +681,7 @@ xfs_convert_page( | |||
684 | goto fail_unlock_page; | 681 | goto fail_unlock_page; |
685 | if (page->mapping != inode->i_mapping) | 682 | if (page->mapping != inode->i_mapping) |
686 | goto fail_unlock_page; | 683 | goto fail_unlock_page; |
687 | if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) | 684 | if (!xfs_check_page_type(page, (*ioendp)->io_type)) |
688 | goto fail_unlock_page; | 685 | goto fail_unlock_page; |
689 | 686 | ||
690 | /* | 687 | /* |
@@ -834,7 +831,7 @@ xfs_aops_discard_page( | |||
834 | struct buffer_head *bh, *head; | 831 | struct buffer_head *bh, *head; |
835 | loff_t offset = page_offset(page); | 832 | loff_t offset = page_offset(page); |
836 | 833 | ||
837 | if (!xfs_is_delayed_page(page, IO_DELALLOC)) | 834 | if (!xfs_check_page_type(page, IO_DELALLOC)) |
838 | goto out_invalidate; | 835 | goto out_invalidate; |
839 | 836 | ||
840 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 837 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
@@ -1146,7 +1143,14 @@ __xfs_get_blocks( | |||
1146 | if (!create && direct && offset >= i_size_read(inode)) | 1143 | if (!create && direct && offset >= i_size_read(inode)) |
1147 | return 0; | 1144 | return 0; |
1148 | 1145 | ||
1149 | if (create) { | 1146 | /* |
1147 | * Direct I/O is usually done on preallocated files, so try getting | ||
1148 | * a block mapping without an exclusive lock first. For buffered | ||
1149 | * writes we already have the exclusive iolock anyway, so avoiding | ||
1150 | * a lock roundtrip here by taking the ilock exclusive from the | ||
1151 | * beginning is a useful micro optimization. | ||
1152 | */ | ||
1153 | if (create && !direct) { | ||
1150 | lockmode = XFS_ILOCK_EXCL; | 1154 | lockmode = XFS_ILOCK_EXCL; |
1151 | xfs_ilock(ip, lockmode); | 1155 | xfs_ilock(ip, lockmode); |
1152 | } else { | 1156 | } else { |
@@ -1168,23 +1172,45 @@ __xfs_get_blocks( | |||
1168 | (!nimaps || | 1172 | (!nimaps || |
1169 | (imap.br_startblock == HOLESTARTBLOCK || | 1173 | (imap.br_startblock == HOLESTARTBLOCK || |
1170 | imap.br_startblock == DELAYSTARTBLOCK))) { | 1174 | imap.br_startblock == DELAYSTARTBLOCK))) { |
1171 | if (direct) { | 1175 | if (direct || xfs_get_extsz_hint(ip)) { |
1176 | /* | ||
1177 | * Drop the ilock in preparation for starting the block | ||
1178 | * allocation transaction. It will be retaken | ||
1179 | * exclusively inside xfs_iomap_write_direct for the | ||
1180 | * actual allocation. | ||
1181 | */ | ||
1182 | xfs_iunlock(ip, lockmode); | ||
1172 | error = xfs_iomap_write_direct(ip, offset, size, | 1183 | error = xfs_iomap_write_direct(ip, offset, size, |
1173 | &imap, nimaps); | 1184 | &imap, nimaps); |
1185 | if (error) | ||
1186 | return -error; | ||
1187 | new = 1; | ||
1174 | } else { | 1188 | } else { |
1189 | /* | ||
1190 | * Delalloc reservations do not require a transaction, | ||
1191 | * we can go on without dropping the lock here. If we | ||
1192 | * are allocating a new delalloc block, make sure that | ||
1193 | * we set the new flag so that we mark the buffer new so | ||
1194 | * that we know that it is newly allocated if the write | ||
1195 | * fails. | ||
1196 | */ | ||
1197 | if (nimaps && imap.br_startblock == HOLESTARTBLOCK) | ||
1198 | new = 1; | ||
1175 | error = xfs_iomap_write_delay(ip, offset, size, &imap); | 1199 | error = xfs_iomap_write_delay(ip, offset, size, &imap); |
1200 | if (error) | ||
1201 | goto out_unlock; | ||
1202 | |||
1203 | xfs_iunlock(ip, lockmode); | ||
1176 | } | 1204 | } |
1177 | if (error) | ||
1178 | goto out_unlock; | ||
1179 | 1205 | ||
1180 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | 1206 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); |
1181 | } else if (nimaps) { | 1207 | } else if (nimaps) { |
1182 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | 1208 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); |
1209 | xfs_iunlock(ip, lockmode); | ||
1183 | } else { | 1210 | } else { |
1184 | trace_xfs_get_blocks_notfound(ip, offset, size); | 1211 | trace_xfs_get_blocks_notfound(ip, offset, size); |
1185 | goto out_unlock; | 1212 | goto out_unlock; |
1186 | } | 1213 | } |
1187 | xfs_iunlock(ip, lockmode); | ||
1188 | 1214 | ||
1189 | if (imap.br_startblock != HOLESTARTBLOCK && | 1215 | if (imap.br_startblock != HOLESTARTBLOCK && |
1190 | imap.br_startblock != DELAYSTARTBLOCK) { | 1216 | imap.br_startblock != DELAYSTARTBLOCK) { |
@@ -1386,52 +1412,91 @@ out_destroy_ioend: | |||
1386 | return ret; | 1412 | return ret; |
1387 | } | 1413 | } |
1388 | 1414 | ||
1415 | /* | ||
1416 | * Punch out the delalloc blocks we have already allocated. | ||
1417 | * | ||
1418 | * Don't bother with xfs_setattr given that nothing can have made it to disk yet | ||
1419 | * as the page is still locked at this point. | ||
1420 | */ | ||
1421 | STATIC void | ||
1422 | xfs_vm_kill_delalloc_range( | ||
1423 | struct inode *inode, | ||
1424 | loff_t start, | ||
1425 | loff_t end) | ||
1426 | { | ||
1427 | struct xfs_inode *ip = XFS_I(inode); | ||
1428 | xfs_fileoff_t start_fsb; | ||
1429 | xfs_fileoff_t end_fsb; | ||
1430 | int error; | ||
1431 | |||
1432 | start_fsb = XFS_B_TO_FSB(ip->i_mount, start); | ||
1433 | end_fsb = XFS_B_TO_FSB(ip->i_mount, end); | ||
1434 | if (end_fsb <= start_fsb) | ||
1435 | return; | ||
1436 | |||
1437 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1438 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | ||
1439 | end_fsb - start_fsb); | ||
1440 | if (error) { | ||
1441 | /* something screwed, just bail */ | ||
1442 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
1443 | xfs_alert(ip->i_mount, | ||
1444 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
1445 | ip->i_ino); | ||
1446 | } | ||
1447 | } | ||
1448 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1449 | } | ||
1450 | |||
1389 | STATIC void | 1451 | STATIC void |
1390 | xfs_vm_write_failed( | 1452 | xfs_vm_write_failed( |
1391 | struct address_space *mapping, | 1453 | struct inode *inode, |
1392 | loff_t to) | 1454 | struct page *page, |
1455 | loff_t pos, | ||
1456 | unsigned len) | ||
1393 | { | 1457 | { |
1394 | struct inode *inode = mapping->host; | 1458 | loff_t block_offset = pos & PAGE_MASK; |
1459 | loff_t block_start; | ||
1460 | loff_t block_end; | ||
1461 | loff_t from = pos & (PAGE_CACHE_SIZE - 1); | ||
1462 | loff_t to = from + len; | ||
1463 | struct buffer_head *bh, *head; | ||
1395 | 1464 | ||
1396 | if (to > inode->i_size) { | 1465 | ASSERT(block_offset + from == pos); |
1397 | /* | ||
1398 | * Punch out the delalloc blocks we have already allocated. | ||
1399 | * | ||
1400 | * Don't bother with xfs_setattr given that nothing can have | ||
1401 | * made it to disk yet as the page is still locked at this | ||
1402 | * point. | ||
1403 | */ | ||
1404 | struct xfs_inode *ip = XFS_I(inode); | ||
1405 | xfs_fileoff_t start_fsb; | ||
1406 | xfs_fileoff_t end_fsb; | ||
1407 | int error; | ||
1408 | 1466 | ||
1409 | truncate_pagecache(inode, to, inode->i_size); | 1467 | head = page_buffers(page); |
1468 | block_start = 0; | ||
1469 | for (bh = head; bh != head || !block_start; | ||
1470 | bh = bh->b_this_page, block_start = block_end, | ||
1471 | block_offset += bh->b_size) { | ||
1472 | block_end = block_start + bh->b_size; | ||
1410 | 1473 | ||
1411 | /* | 1474 | /* skip buffers before the write */ |
1412 | * Check if there are any blocks that are outside of i_size | 1475 | if (block_end <= from) |
1413 | * that need to be trimmed back. | 1476 | continue; |
1414 | */ | 1477 | |
1415 | start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; | 1478 | /* if the buffer is after the write, we're done */ |
1416 | end_fsb = XFS_B_TO_FSB(ip->i_mount, to); | 1479 | if (block_start >= to) |
1417 | if (end_fsb <= start_fsb) | 1480 | break; |
1418 | return; | 1481 | |
1419 | 1482 | if (!buffer_delay(bh)) | |
1420 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1483 | continue; |
1421 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | 1484 | |
1422 | end_fsb - start_fsb); | 1485 | if (!buffer_new(bh) && block_offset < i_size_read(inode)) |
1423 | if (error) { | 1486 | continue; |
1424 | /* something screwed, just bail */ | 1487 | |
1425 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 1488 | xfs_vm_kill_delalloc_range(inode, block_offset, |
1426 | xfs_alert(ip->i_mount, | 1489 | block_offset + bh->b_size); |
1427 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
1428 | ip->i_ino); | ||
1429 | } | ||
1430 | } | ||
1431 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1432 | } | 1490 | } |
1491 | |||
1433 | } | 1492 | } |
1434 | 1493 | ||
1494 | /* | ||
1495 | * This used to call block_write_begin(), but it unlocks and releases the page | ||
1496 | * on error, and we need that page to be able to punch stale delalloc blocks out | ||
1497 | * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at | ||
1498 | * the appropriate point. | ||
1499 | */ | ||
1435 | STATIC int | 1500 | STATIC int |
1436 | xfs_vm_write_begin( | 1501 | xfs_vm_write_begin( |
1437 | struct file *file, | 1502 | struct file *file, |
@@ -1442,15 +1507,40 @@ xfs_vm_write_begin( | |||
1442 | struct page **pagep, | 1507 | struct page **pagep, |
1443 | void **fsdata) | 1508 | void **fsdata) |
1444 | { | 1509 | { |
1445 | int ret; | 1510 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
1511 | struct page *page; | ||
1512 | int status; | ||
1446 | 1513 | ||
1447 | ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, | 1514 | ASSERT(len <= PAGE_CACHE_SIZE); |
1448 | pagep, xfs_get_blocks); | 1515 | |
1449 | if (unlikely(ret)) | 1516 | page = grab_cache_page_write_begin(mapping, index, |
1450 | xfs_vm_write_failed(mapping, pos + len); | 1517 | flags | AOP_FLAG_NOFS); |
1451 | return ret; | 1518 | if (!page) |
1519 | return -ENOMEM; | ||
1520 | |||
1521 | status = __block_write_begin(page, pos, len, xfs_get_blocks); | ||
1522 | if (unlikely(status)) { | ||
1523 | struct inode *inode = mapping->host; | ||
1524 | |||
1525 | xfs_vm_write_failed(inode, page, pos, len); | ||
1526 | unlock_page(page); | ||
1527 | |||
1528 | if (pos + len > i_size_read(inode)) | ||
1529 | truncate_pagecache(inode, pos + len, i_size_read(inode)); | ||
1530 | |||
1531 | page_cache_release(page); | ||
1532 | page = NULL; | ||
1533 | } | ||
1534 | |||
1535 | *pagep = page; | ||
1536 | return status; | ||
1452 | } | 1537 | } |
1453 | 1538 | ||
1539 | /* | ||
1540 | * On failure, we only need to kill delalloc blocks beyond EOF because they | ||
1541 | * will never be written. For blocks within EOF, generic_write_end() zeros them | ||
1542 | * so they are safe to leave alone and be written with all the other valid data. | ||
1543 | */ | ||
1454 | STATIC int | 1544 | STATIC int |
1455 | xfs_vm_write_end( | 1545 | xfs_vm_write_end( |
1456 | struct file *file, | 1546 | struct file *file, |
@@ -1463,9 +1553,19 @@ xfs_vm_write_end( | |||
1463 | { | 1553 | { |
1464 | int ret; | 1554 | int ret; |
1465 | 1555 | ||
1556 | ASSERT(len <= PAGE_CACHE_SIZE); | ||
1557 | |||
1466 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 1558 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
1467 | if (unlikely(ret < len)) | 1559 | if (unlikely(ret < len)) { |
1468 | xfs_vm_write_failed(mapping, pos + len); | 1560 | struct inode *inode = mapping->host; |
1561 | size_t isize = i_size_read(inode); | ||
1562 | loff_t to = pos + len; | ||
1563 | |||
1564 | if (to > isize) { | ||
1565 | truncate_pagecache(inode, to, isize); | ||
1566 | xfs_vm_kill_delalloc_range(inode, isize, to); | ||
1567 | } | ||
1568 | } | ||
1469 | return ret; | 1569 | return ret; |
1470 | } | 1570 | } |
1471 | 1571 | ||
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 65d61b948ead..a17ff01b5adf 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include "xfs_types.h" | 21 | #include "xfs_types.h" |
22 | #include "xfs_bit.h" | 22 | #include "xfs_bit.h" |
23 | #include "xfs_log.h" | 23 | #include "xfs_log.h" |
24 | #include "xfs_inum.h" | ||
25 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
26 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
27 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
@@ -39,7 +38,6 @@ | |||
39 | #include "xfs_error.h" | 38 | #include "xfs_error.h" |
40 | #include "xfs_quota.h" | 39 | #include "xfs_quota.h" |
41 | #include "xfs_trans_space.h" | 40 | #include "xfs_trans_space.h" |
42 | #include "xfs_rw.h" | ||
43 | #include "xfs_vnodeops.h" | 41 | #include "xfs_vnodeops.h" |
44 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
45 | 43 | ||
@@ -1987,14 +1985,12 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) | |||
1987 | (map[i].br_startblock != HOLESTARTBLOCK)); | 1985 | (map[i].br_startblock != HOLESTARTBLOCK)); |
1988 | dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); | 1986 | dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); |
1989 | blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); | 1987 | blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); |
1990 | error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, | 1988 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, |
1991 | blkcnt, XBF_LOCK | XBF_DONT_BLOCK, | 1989 | dblkno, blkcnt, 0, &bp); |
1992 | &bp); | ||
1993 | if (error) | 1990 | if (error) |
1994 | return(error); | 1991 | return(error); |
1995 | 1992 | ||
1996 | tmp = (valuelen < XFS_BUF_SIZE(bp)) | 1993 | tmp = min_t(int, valuelen, BBTOB(bp->b_length)); |
1997 | ? valuelen : XFS_BUF_SIZE(bp); | ||
1998 | xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); | 1994 | xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); |
1999 | xfs_buf_relse(bp); | 1995 | xfs_buf_relse(bp); |
2000 | dst += tmp; | 1996 | dst += tmp; |
@@ -2097,6 +2093,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2097 | lblkno = args->rmtblkno; | 2093 | lblkno = args->rmtblkno; |
2098 | valuelen = args->valuelen; | 2094 | valuelen = args->valuelen; |
2099 | while (valuelen > 0) { | 2095 | while (valuelen > 0) { |
2096 | int buflen; | ||
2097 | |||
2100 | /* | 2098 | /* |
2101 | * Try to remember where we decided to put the value. | 2099 | * Try to remember where we decided to put the value. |
2102 | */ | 2100 | */ |
@@ -2114,15 +2112,16 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2114 | dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), | 2112 | dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), |
2115 | blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); | 2113 | blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); |
2116 | 2114 | ||
2117 | bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, | 2115 | bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0); |
2118 | XBF_LOCK | XBF_DONT_BLOCK); | ||
2119 | if (!bp) | 2116 | if (!bp) |
2120 | return ENOMEM; | 2117 | return ENOMEM; |
2121 | tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : | 2118 | |
2122 | XFS_BUF_SIZE(bp); | 2119 | buflen = BBTOB(bp->b_length); |
2120 | tmp = min_t(int, valuelen, buflen); | ||
2123 | xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); | 2121 | xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); |
2124 | if (tmp < XFS_BUF_SIZE(bp)) | 2122 | if (tmp < buflen) |
2125 | xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); | 2123 | xfs_buf_zero(bp, tmp, buflen - tmp); |
2124 | |||
2126 | error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ | 2125 | error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ |
2127 | xfs_buf_relse(bp); | 2126 | xfs_buf_relse(bp); |
2128 | if (error) | 2127 | if (error) |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 76d93dc953e1..7d89d800f517 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
@@ -2983,7 +2982,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, | |||
2983 | map.br_blockcount); | 2982 | map.br_blockcount); |
2984 | bp = xfs_trans_get_buf(*trans, | 2983 | bp = xfs_trans_get_buf(*trans, |
2985 | dp->i_mount->m_ddev_targp, | 2984 | dp->i_mount->m_ddev_targp, |
2986 | dblkno, dblkcnt, XBF_LOCK); | 2985 | dblkno, dblkcnt, 0); |
2987 | if (!bp) | 2986 | if (!bp) |
2988 | return ENOMEM; | 2987 | return ENOMEM; |
2989 | xfs_trans_binval(*trans, bp); | 2988 | xfs_trans_binval(*trans, bp); |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 85e7e327bcd8..58b815ec8c91 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -41,7 +41,6 @@ | |||
41 | #include "xfs_rtalloc.h" | 41 | #include "xfs_rtalloc.h" |
42 | #include "xfs_error.h" | 42 | #include "xfs_error.h" |
43 | #include "xfs_attr_leaf.h" | 43 | #include "xfs_attr_leaf.h" |
44 | #include "xfs_rw.h" | ||
45 | #include "xfs_quota.h" | 44 | #include "xfs_quota.h" |
46 | #include "xfs_trans_space.h" | 45 | #include "xfs_trans_space.h" |
47 | #include "xfs_buf_item.h" | 46 | #include "xfs_buf_item.h" |
@@ -4527,7 +4526,7 @@ out_unreserve_blocks: | |||
4527 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); | 4526 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); |
4528 | out_unreserve_quota: | 4527 | out_unreserve_quota: |
4529 | if (XFS_IS_QUOTA_ON(mp)) | 4528 | if (XFS_IS_QUOTA_ON(mp)) |
4530 | xfs_trans_unreserve_quota_nblks(NULL, ip, alen, 0, rt ? | 4529 | xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? |
4531 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); | 4530 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); |
4532 | return error; | 4531 | return error; |
4533 | } | 4532 | } |
@@ -5621,8 +5620,20 @@ xfs_getbmap( | |||
5621 | XFS_FSB_TO_BB(mp, map[i].br_blockcount); | 5620 | XFS_FSB_TO_BB(mp, map[i].br_blockcount); |
5622 | out[cur_ext].bmv_unused1 = 0; | 5621 | out[cur_ext].bmv_unused1 = 0; |
5623 | out[cur_ext].bmv_unused2 = 0; | 5622 | out[cur_ext].bmv_unused2 = 0; |
5624 | ASSERT(((iflags & BMV_IF_DELALLOC) != 0) || | 5623 | |
5625 | (map[i].br_startblock != DELAYSTARTBLOCK)); | 5624 | /* |
5625 | * delayed allocation extents that start beyond EOF can | ||
5626 | * occur due to speculative EOF allocation when the | ||
5627 | * delalloc extent is larger than the largest freespace | ||
5628 | * extent at conversion time. These extents cannot be | ||
5629 | * converted by data writeback, so can exist here even | ||
5630 | * if we are not supposed to be finding delalloc | ||
5631 | * extents. | ||
5632 | */ | ||
5633 | if (map[i].br_startblock == DELAYSTARTBLOCK && | ||
5634 | map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) | ||
5635 | ASSERT((iflags & BMV_IF_DELALLOC) != 0); | ||
5636 | |||
5626 | if (map[i].br_startblock == HOLESTARTBLOCK && | 5637 | if (map[i].br_startblock == HOLESTARTBLOCK && |
5627 | whichfork == XFS_ATTR_FORK) { | 5638 | whichfork == XFS_ATTR_FORK) { |
5628 | /* came to the end of attribute fork */ | 5639 | /* came to the end of attribute fork */ |
@@ -6157,3 +6168,16 @@ next_block: | |||
6157 | 6168 | ||
6158 | return error; | 6169 | return error; |
6159 | } | 6170 | } |
6171 | |||
6172 | /* | ||
6173 | * Convert the given file system block to a disk block. We have to treat it | ||
6174 | * differently based on whether the file is a real time file or not, because the | ||
6175 | * bmap code does. | ||
6176 | */ | ||
6177 | xfs_daddr_t | ||
6178 | xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) | ||
6179 | { | ||
6180 | return (XFS_IS_REALTIME_INODE(ip) ? \ | ||
6181 | (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ | ||
6182 | XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); | ||
6183 | } | ||
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 89ee672d378a..803b56d7ce16 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -211,6 +211,9 @@ int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, | |||
211 | int whichfork, int *count); | 211 | int whichfork, int *count); |
212 | int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, | 212 | int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, |
213 | xfs_fileoff_t start_fsb, xfs_fileoff_t length); | 213 | xfs_fileoff_t start_fsb, xfs_fileoff_t length); |
214 | |||
215 | xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb); | ||
216 | |||
214 | #endif /* __KERNEL__ */ | 217 | #endif /* __KERNEL__ */ |
215 | 218 | ||
216 | #endif /* __XFS_BMAP_H__ */ | 219 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index e2f5d59cbeaf..862084a47a7e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 1f19f03af9d3..e53e317b1582 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 6819b5163e33..172d3cc8f8cb 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -35,14 +35,12 @@ | |||
35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
36 | 36 | ||
37 | #include "xfs_sb.h" | 37 | #include "xfs_sb.h" |
38 | #include "xfs_inum.h" | ||
39 | #include "xfs_log.h" | 38 | #include "xfs_log.h" |
40 | #include "xfs_ag.h" | 39 | #include "xfs_ag.h" |
41 | #include "xfs_mount.h" | 40 | #include "xfs_mount.h" |
42 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
43 | 42 | ||
44 | static kmem_zone_t *xfs_buf_zone; | 43 | static kmem_zone_t *xfs_buf_zone; |
45 | STATIC int xfsbufd(void *); | ||
46 | 44 | ||
47 | static struct workqueue_struct *xfslogd_workqueue; | 45 | static struct workqueue_struct *xfslogd_workqueue; |
48 | 46 | ||
@@ -57,11 +55,7 @@ static struct workqueue_struct *xfslogd_workqueue; | |||
57 | #endif | 55 | #endif |
58 | 56 | ||
59 | #define xb_to_gfp(flags) \ | 57 | #define xb_to_gfp(flags) \ |
60 | ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \ | 58 | ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN) |
61 | ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) | ||
62 | |||
63 | #define xb_to_km(flags) \ | ||
64 | (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) | ||
65 | 59 | ||
66 | 60 | ||
67 | static inline int | 61 | static inline int |
@@ -71,11 +65,11 @@ xfs_buf_is_vmapped( | |||
71 | /* | 65 | /* |
72 | * Return true if the buffer is vmapped. | 66 | * Return true if the buffer is vmapped. |
73 | * | 67 | * |
74 | * The XBF_MAPPED flag is set if the buffer should be mapped, but the | 68 | * b_addr is null if the buffer is not mapped, but the code is clever |
75 | * code is clever enough to know it doesn't have to map a single page, | 69 | * enough to know it doesn't have to map a single page, so the check has |
76 | * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. | 70 | * to be both for b_addr and bp->b_page_count > 1. |
77 | */ | 71 | */ |
78 | return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; | 72 | return bp->b_addr && bp->b_page_count > 1; |
79 | } | 73 | } |
80 | 74 | ||
81 | static inline int | 75 | static inline int |
@@ -144,8 +138,17 @@ void | |||
144 | xfs_buf_stale( | 138 | xfs_buf_stale( |
145 | struct xfs_buf *bp) | 139 | struct xfs_buf *bp) |
146 | { | 140 | { |
141 | ASSERT(xfs_buf_islocked(bp)); | ||
142 | |||
147 | bp->b_flags |= XBF_STALE; | 143 | bp->b_flags |= XBF_STALE; |
148 | xfs_buf_delwri_dequeue(bp); | 144 | |
145 | /* | ||
146 | * Clear the delwri status so that a delwri queue walker will not | ||
147 | * flush this buffer to disk now that it is stale. The delwri queue has | ||
148 | * a reference to the buffer, so this is safe to do. | ||
149 | */ | ||
150 | bp->b_flags &= ~_XBF_DELWRI_Q; | ||
151 | |||
149 | atomic_set(&(bp)->b_lru_ref, 0); | 152 | atomic_set(&(bp)->b_lru_ref, 0); |
150 | if (!list_empty(&bp->b_lru)) { | 153 | if (!list_empty(&bp->b_lru)) { |
151 | struct xfs_buftarg *btp = bp->b_target; | 154 | struct xfs_buftarg *btp = bp->b_target; |
@@ -164,22 +167,22 @@ xfs_buf_stale( | |||
164 | struct xfs_buf * | 167 | struct xfs_buf * |
165 | xfs_buf_alloc( | 168 | xfs_buf_alloc( |
166 | struct xfs_buftarg *target, | 169 | struct xfs_buftarg *target, |
167 | xfs_off_t range_base, | 170 | xfs_daddr_t blkno, |
168 | size_t range_length, | 171 | size_t numblks, |
169 | xfs_buf_flags_t flags) | 172 | xfs_buf_flags_t flags) |
170 | { | 173 | { |
171 | struct xfs_buf *bp; | 174 | struct xfs_buf *bp; |
172 | 175 | ||
173 | bp = kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)); | 176 | bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); |
174 | if (unlikely(!bp)) | 177 | if (unlikely(!bp)) |
175 | return NULL; | 178 | return NULL; |
176 | 179 | ||
177 | /* | 180 | /* |
178 | * We don't want certain flags to appear in b_flags. | 181 | * We don't want certain flags to appear in b_flags unless they are |
182 | * specifically set by later operations on the buffer. | ||
179 | */ | 183 | */ |
180 | flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD); | 184 | flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); |
181 | 185 | ||
182 | memset(bp, 0, sizeof(xfs_buf_t)); | ||
183 | atomic_set(&bp->b_hold, 1); | 186 | atomic_set(&bp->b_hold, 1); |
184 | atomic_set(&bp->b_lru_ref, 1); | 187 | atomic_set(&bp->b_lru_ref, 1); |
185 | init_completion(&bp->b_iowait); | 188 | init_completion(&bp->b_iowait); |
@@ -189,14 +192,22 @@ xfs_buf_alloc( | |||
189 | sema_init(&bp->b_sema, 0); /* held, no waiters */ | 192 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
190 | XB_SET_OWNER(bp); | 193 | XB_SET_OWNER(bp); |
191 | bp->b_target = target; | 194 | bp->b_target = target; |
192 | bp->b_file_offset = range_base; | 195 | |
193 | /* | 196 | /* |
194 | * Set buffer_length and count_desired to the same value initially. | 197 | * Set length and io_length to the same value initially. |
195 | * I/O routines should use count_desired, which will be the same in | 198 | * I/O routines should use io_length, which will be the same in |
196 | * most cases but may be reset (e.g. XFS recovery). | 199 | * most cases but may be reset (e.g. XFS recovery). |
197 | */ | 200 | */ |
198 | bp->b_buffer_length = bp->b_count_desired = range_length; | 201 | bp->b_length = numblks; |
202 | bp->b_io_length = numblks; | ||
199 | bp->b_flags = flags; | 203 | bp->b_flags = flags; |
204 | |||
205 | /* | ||
206 | * We do not set the block number here in the buffer because we have not | ||
207 | * finished initialising the buffer. We insert the buffer into the cache | ||
208 | * in this state, so this ensures that we are unable to do IO on a | ||
209 | * buffer that hasn't been fully initialised. | ||
210 | */ | ||
200 | bp->b_bn = XFS_BUF_DADDR_NULL; | 211 | bp->b_bn = XFS_BUF_DADDR_NULL; |
201 | atomic_set(&bp->b_pin_count, 0); | 212 | atomic_set(&bp->b_pin_count, 0); |
202 | init_waitqueue_head(&bp->b_waiters); | 213 | init_waitqueue_head(&bp->b_waiters); |
@@ -219,13 +230,12 @@ _xfs_buf_get_pages( | |||
219 | { | 230 | { |
220 | /* Make sure that we have a page list */ | 231 | /* Make sure that we have a page list */ |
221 | if (bp->b_pages == NULL) { | 232 | if (bp->b_pages == NULL) { |
222 | bp->b_offset = xfs_buf_poff(bp->b_file_offset); | ||
223 | bp->b_page_count = page_count; | 233 | bp->b_page_count = page_count; |
224 | if (page_count <= XB_PAGES) { | 234 | if (page_count <= XB_PAGES) { |
225 | bp->b_pages = bp->b_page_array; | 235 | bp->b_pages = bp->b_page_array; |
226 | } else { | 236 | } else { |
227 | bp->b_pages = kmem_alloc(sizeof(struct page *) * | 237 | bp->b_pages = kmem_alloc(sizeof(struct page *) * |
228 | page_count, xb_to_km(flags)); | 238 | page_count, KM_NOFS); |
229 | if (bp->b_pages == NULL) | 239 | if (bp->b_pages == NULL) |
230 | return -ENOMEM; | 240 | return -ENOMEM; |
231 | } | 241 | } |
@@ -288,11 +298,11 @@ xfs_buf_allocate_memory( | |||
288 | xfs_buf_t *bp, | 298 | xfs_buf_t *bp, |
289 | uint flags) | 299 | uint flags) |
290 | { | 300 | { |
291 | size_t size = bp->b_count_desired; | 301 | size_t size; |
292 | size_t nbytes, offset; | 302 | size_t nbytes, offset; |
293 | gfp_t gfp_mask = xb_to_gfp(flags); | 303 | gfp_t gfp_mask = xb_to_gfp(flags); |
294 | unsigned short page_count, i; | 304 | unsigned short page_count, i; |
295 | xfs_off_t end; | 305 | xfs_off_t start, end; |
296 | int error; | 306 | int error; |
297 | 307 | ||
298 | /* | 308 | /* |
@@ -300,15 +310,15 @@ xfs_buf_allocate_memory( | |||
300 | * the memory from the heap - there's no need for the complexity of | 310 | * the memory from the heap - there's no need for the complexity of |
301 | * page arrays to keep allocation down to order 0. | 311 | * page arrays to keep allocation down to order 0. |
302 | */ | 312 | */ |
303 | if (bp->b_buffer_length < PAGE_SIZE) { | 313 | size = BBTOB(bp->b_length); |
304 | bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); | 314 | if (size < PAGE_SIZE) { |
315 | bp->b_addr = kmem_alloc(size, KM_NOFS); | ||
305 | if (!bp->b_addr) { | 316 | if (!bp->b_addr) { |
306 | /* low memory - use alloc_page loop instead */ | 317 | /* low memory - use alloc_page loop instead */ |
307 | goto use_alloc_page; | 318 | goto use_alloc_page; |
308 | } | 319 | } |
309 | 320 | ||
310 | if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & | 321 | if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) != |
311 | PAGE_MASK) != | ||
312 | ((unsigned long)bp->b_addr & PAGE_MASK)) { | 322 | ((unsigned long)bp->b_addr & PAGE_MASK)) { |
313 | /* b_addr spans two pages - use alloc_page instead */ | 323 | /* b_addr spans two pages - use alloc_page instead */ |
314 | kmem_free(bp->b_addr); | 324 | kmem_free(bp->b_addr); |
@@ -319,13 +329,14 @@ xfs_buf_allocate_memory( | |||
319 | bp->b_pages = bp->b_page_array; | 329 | bp->b_pages = bp->b_page_array; |
320 | bp->b_pages[0] = virt_to_page(bp->b_addr); | 330 | bp->b_pages[0] = virt_to_page(bp->b_addr); |
321 | bp->b_page_count = 1; | 331 | bp->b_page_count = 1; |
322 | bp->b_flags |= XBF_MAPPED | _XBF_KMEM; | 332 | bp->b_flags |= _XBF_KMEM; |
323 | return 0; | 333 | return 0; |
324 | } | 334 | } |
325 | 335 | ||
326 | use_alloc_page: | 336 | use_alloc_page: |
327 | end = bp->b_file_offset + bp->b_buffer_length; | 337 | start = BBTOB(bp->b_bn) >> PAGE_SHIFT; |
328 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); | 338 | end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT; |
339 | page_count = end - start; | ||
329 | error = _xfs_buf_get_pages(bp, page_count, flags); | 340 | error = _xfs_buf_get_pages(bp, page_count, flags); |
330 | if (unlikely(error)) | 341 | if (unlikely(error)) |
331 | return error; | 342 | return error; |
@@ -388,8 +399,9 @@ _xfs_buf_map_pages( | |||
388 | if (bp->b_page_count == 1) { | 399 | if (bp->b_page_count == 1) { |
389 | /* A single page buffer is always mappable */ | 400 | /* A single page buffer is always mappable */ |
390 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; | 401 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; |
391 | bp->b_flags |= XBF_MAPPED; | 402 | } else if (flags & XBF_UNMAPPED) { |
392 | } else if (flags & XBF_MAPPED) { | 403 | bp->b_addr = NULL; |
404 | } else { | ||
393 | int retried = 0; | 405 | int retried = 0; |
394 | 406 | ||
395 | do { | 407 | do { |
@@ -403,7 +415,6 @@ _xfs_buf_map_pages( | |||
403 | if (!bp->b_addr) | 415 | if (!bp->b_addr) |
404 | return -ENOMEM; | 416 | return -ENOMEM; |
405 | bp->b_addr += bp->b_offset; | 417 | bp->b_addr += bp->b_offset; |
406 | bp->b_flags |= XBF_MAPPED; | ||
407 | } | 418 | } |
408 | 419 | ||
409 | return 0; | 420 | return 0; |
@@ -420,29 +431,27 @@ _xfs_buf_map_pages( | |||
420 | */ | 431 | */ |
421 | xfs_buf_t * | 432 | xfs_buf_t * |
422 | _xfs_buf_find( | 433 | _xfs_buf_find( |
423 | xfs_buftarg_t *btp, /* block device target */ | 434 | struct xfs_buftarg *btp, |
424 | xfs_off_t ioff, /* starting offset of range */ | 435 | xfs_daddr_t blkno, |
425 | size_t isize, /* length of range */ | 436 | size_t numblks, |
426 | xfs_buf_flags_t flags, | 437 | xfs_buf_flags_t flags, |
427 | xfs_buf_t *new_bp) | 438 | xfs_buf_t *new_bp) |
428 | { | 439 | { |
429 | xfs_off_t range_base; | 440 | size_t numbytes; |
430 | size_t range_length; | ||
431 | struct xfs_perag *pag; | 441 | struct xfs_perag *pag; |
432 | struct rb_node **rbp; | 442 | struct rb_node **rbp; |
433 | struct rb_node *parent; | 443 | struct rb_node *parent; |
434 | xfs_buf_t *bp; | 444 | xfs_buf_t *bp; |
435 | 445 | ||
436 | range_base = (ioff << BBSHIFT); | 446 | numbytes = BBTOB(numblks); |
437 | range_length = (isize << BBSHIFT); | ||
438 | 447 | ||
439 | /* Check for IOs smaller than the sector size / not sector aligned */ | 448 | /* Check for IOs smaller than the sector size / not sector aligned */ |
440 | ASSERT(!(range_length < (1 << btp->bt_sshift))); | 449 | ASSERT(!(numbytes < (1 << btp->bt_sshift))); |
441 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); | 450 | ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); |
442 | 451 | ||
443 | /* get tree root */ | 452 | /* get tree root */ |
444 | pag = xfs_perag_get(btp->bt_mount, | 453 | pag = xfs_perag_get(btp->bt_mount, |
445 | xfs_daddr_to_agno(btp->bt_mount, ioff)); | 454 | xfs_daddr_to_agno(btp->bt_mount, blkno)); |
446 | 455 | ||
447 | /* walk tree */ | 456 | /* walk tree */ |
448 | spin_lock(&pag->pag_buf_lock); | 457 | spin_lock(&pag->pag_buf_lock); |
@@ -453,20 +462,20 @@ _xfs_buf_find( | |||
453 | parent = *rbp; | 462 | parent = *rbp; |
454 | bp = rb_entry(parent, struct xfs_buf, b_rbnode); | 463 | bp = rb_entry(parent, struct xfs_buf, b_rbnode); |
455 | 464 | ||
456 | if (range_base < bp->b_file_offset) | 465 | if (blkno < bp->b_bn) |
457 | rbp = &(*rbp)->rb_left; | 466 | rbp = &(*rbp)->rb_left; |
458 | else if (range_base > bp->b_file_offset) | 467 | else if (blkno > bp->b_bn) |
459 | rbp = &(*rbp)->rb_right; | 468 | rbp = &(*rbp)->rb_right; |
460 | else { | 469 | else { |
461 | /* | 470 | /* |
462 | * found a block offset match. If the range doesn't | 471 | * found a block number match. If the range doesn't |
463 | * match, the only way this is allowed is if the buffer | 472 | * match, the only way this is allowed is if the buffer |
464 | * in the cache is stale and the transaction that made | 473 | * in the cache is stale and the transaction that made |
465 | * it stale has not yet committed. i.e. we are | 474 | * it stale has not yet committed. i.e. we are |
466 | * reallocating a busy extent. Skip this buffer and | 475 | * reallocating a busy extent. Skip this buffer and |
467 | * continue searching to the right for an exact match. | 476 | * continue searching to the right for an exact match. |
468 | */ | 477 | */ |
469 | if (bp->b_buffer_length != range_length) { | 478 | if (bp->b_length != numblks) { |
470 | ASSERT(bp->b_flags & XBF_STALE); | 479 | ASSERT(bp->b_flags & XBF_STALE); |
471 | rbp = &(*rbp)->rb_right; | 480 | rbp = &(*rbp)->rb_right; |
472 | continue; | 481 | continue; |
@@ -511,7 +520,7 @@ found: | |||
511 | */ | 520 | */ |
512 | if (bp->b_flags & XBF_STALE) { | 521 | if (bp->b_flags & XBF_STALE) { |
513 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); | 522 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); |
514 | bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; | 523 | bp->b_flags &= _XBF_KMEM | _XBF_PAGES; |
515 | } | 524 | } |
516 | 525 | ||
517 | trace_xfs_buf_find(bp, flags, _RET_IP_); | 526 | trace_xfs_buf_find(bp, flags, _RET_IP_); |
@@ -526,63 +535,59 @@ found: | |||
526 | */ | 535 | */ |
527 | struct xfs_buf * | 536 | struct xfs_buf * |
528 | xfs_buf_get( | 537 | xfs_buf_get( |
529 | xfs_buftarg_t *target,/* target for buffer */ | 538 | xfs_buftarg_t *target, |
530 | xfs_off_t ioff, /* starting offset of range */ | 539 | xfs_daddr_t blkno, |
531 | size_t isize, /* length of range */ | 540 | size_t numblks, |
532 | xfs_buf_flags_t flags) | 541 | xfs_buf_flags_t flags) |
533 | { | 542 | { |
534 | struct xfs_buf *bp; | 543 | struct xfs_buf *bp; |
535 | struct xfs_buf *new_bp; | 544 | struct xfs_buf *new_bp; |
536 | int error = 0; | 545 | int error = 0; |
537 | 546 | ||
538 | bp = _xfs_buf_find(target, ioff, isize, flags, NULL); | 547 | bp = _xfs_buf_find(target, blkno, numblks, flags, NULL); |
539 | if (likely(bp)) | 548 | if (likely(bp)) |
540 | goto found; | 549 | goto found; |
541 | 550 | ||
542 | new_bp = xfs_buf_alloc(target, ioff << BBSHIFT, isize << BBSHIFT, | 551 | new_bp = xfs_buf_alloc(target, blkno, numblks, flags); |
543 | flags); | ||
544 | if (unlikely(!new_bp)) | 552 | if (unlikely(!new_bp)) |
545 | return NULL; | 553 | return NULL; |
546 | 554 | ||
547 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); | 555 | error = xfs_buf_allocate_memory(new_bp, flags); |
548 | if (!bp) { | 556 | if (error) { |
549 | kmem_zone_free(xfs_buf_zone, new_bp); | 557 | kmem_zone_free(xfs_buf_zone, new_bp); |
550 | return NULL; | 558 | return NULL; |
551 | } | 559 | } |
552 | 560 | ||
553 | if (bp == new_bp) { | 561 | bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp); |
554 | error = xfs_buf_allocate_memory(bp, flags); | 562 | if (!bp) { |
555 | if (error) | 563 | xfs_buf_free(new_bp); |
556 | goto no_buffer; | 564 | return NULL; |
557 | } else | 565 | } |
558 | kmem_zone_free(xfs_buf_zone, new_bp); | 566 | |
567 | if (bp != new_bp) | ||
568 | xfs_buf_free(new_bp); | ||
559 | 569 | ||
560 | /* | 570 | /* |
561 | * Now we have a workable buffer, fill in the block number so | 571 | * Now we have a workable buffer, fill in the block number so |
562 | * that we can do IO on it. | 572 | * that we can do IO on it. |
563 | */ | 573 | */ |
564 | bp->b_bn = ioff; | 574 | bp->b_bn = blkno; |
565 | bp->b_count_desired = bp->b_buffer_length; | 575 | bp->b_io_length = bp->b_length; |
566 | 576 | ||
567 | found: | 577 | found: |
568 | if (!(bp->b_flags & XBF_MAPPED)) { | 578 | if (!bp->b_addr) { |
569 | error = _xfs_buf_map_pages(bp, flags); | 579 | error = _xfs_buf_map_pages(bp, flags); |
570 | if (unlikely(error)) { | 580 | if (unlikely(error)) { |
571 | xfs_warn(target->bt_mount, | 581 | xfs_warn(target->bt_mount, |
572 | "%s: failed to map pages\n", __func__); | 582 | "%s: failed to map pages\n", __func__); |
573 | goto no_buffer; | 583 | xfs_buf_relse(bp); |
584 | return NULL; | ||
574 | } | 585 | } |
575 | } | 586 | } |
576 | 587 | ||
577 | XFS_STATS_INC(xb_get); | 588 | XFS_STATS_INC(xb_get); |
578 | trace_xfs_buf_get(bp, flags, _RET_IP_); | 589 | trace_xfs_buf_get(bp, flags, _RET_IP_); |
579 | return bp; | 590 | return bp; |
580 | |||
581 | no_buffer: | ||
582 | if (flags & (XBF_LOCK | XBF_TRYLOCK)) | ||
583 | xfs_buf_unlock(bp); | ||
584 | xfs_buf_rele(bp); | ||
585 | return NULL; | ||
586 | } | 591 | } |
587 | 592 | ||
588 | STATIC int | 593 | STATIC int |
@@ -590,32 +595,30 @@ _xfs_buf_read( | |||
590 | xfs_buf_t *bp, | 595 | xfs_buf_t *bp, |
591 | xfs_buf_flags_t flags) | 596 | xfs_buf_flags_t flags) |
592 | { | 597 | { |
593 | int status; | 598 | ASSERT(!(flags & XBF_WRITE)); |
594 | |||
595 | ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); | ||
596 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); | 599 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); |
597 | 600 | ||
598 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD); | 601 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); |
599 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); | 602 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); |
600 | 603 | ||
601 | status = xfs_buf_iorequest(bp); | 604 | xfs_buf_iorequest(bp); |
602 | if (status || bp->b_error || (flags & XBF_ASYNC)) | 605 | if (flags & XBF_ASYNC) |
603 | return status; | 606 | return 0; |
604 | return xfs_buf_iowait(bp); | 607 | return xfs_buf_iowait(bp); |
605 | } | 608 | } |
606 | 609 | ||
607 | xfs_buf_t * | 610 | xfs_buf_t * |
608 | xfs_buf_read( | 611 | xfs_buf_read( |
609 | xfs_buftarg_t *target, | 612 | xfs_buftarg_t *target, |
610 | xfs_off_t ioff, | 613 | xfs_daddr_t blkno, |
611 | size_t isize, | 614 | size_t numblks, |
612 | xfs_buf_flags_t flags) | 615 | xfs_buf_flags_t flags) |
613 | { | 616 | { |
614 | xfs_buf_t *bp; | 617 | xfs_buf_t *bp; |
615 | 618 | ||
616 | flags |= XBF_READ; | 619 | flags |= XBF_READ; |
617 | 620 | ||
618 | bp = xfs_buf_get(target, ioff, isize, flags); | 621 | bp = xfs_buf_get(target, blkno, numblks, flags); |
619 | if (bp) { | 622 | if (bp) { |
620 | trace_xfs_buf_read(bp, flags, _RET_IP_); | 623 | trace_xfs_buf_read(bp, flags, _RET_IP_); |
621 | 624 | ||
@@ -627,7 +630,8 @@ xfs_buf_read( | |||
627 | * Read ahead call which is already satisfied, | 630 | * Read ahead call which is already satisfied, |
628 | * drop the buffer | 631 | * drop the buffer |
629 | */ | 632 | */ |
630 | goto no_buffer; | 633 | xfs_buf_relse(bp); |
634 | return NULL; | ||
631 | } else { | 635 | } else { |
632 | /* We do not want read in the flags */ | 636 | /* We do not want read in the flags */ |
633 | bp->b_flags &= ~XBF_READ; | 637 | bp->b_flags &= ~XBF_READ; |
@@ -635,12 +639,6 @@ xfs_buf_read( | |||
635 | } | 639 | } |
636 | 640 | ||
637 | return bp; | 641 | return bp; |
638 | |||
639 | no_buffer: | ||
640 | if (flags & (XBF_LOCK | XBF_TRYLOCK)) | ||
641 | xfs_buf_unlock(bp); | ||
642 | xfs_buf_rele(bp); | ||
643 | return NULL; | ||
644 | } | 642 | } |
645 | 643 | ||
646 | /* | 644 | /* |
@@ -650,14 +648,14 @@ xfs_buf_read( | |||
650 | void | 648 | void |
651 | xfs_buf_readahead( | 649 | xfs_buf_readahead( |
652 | xfs_buftarg_t *target, | 650 | xfs_buftarg_t *target, |
653 | xfs_off_t ioff, | 651 | xfs_daddr_t blkno, |
654 | size_t isize) | 652 | size_t numblks) |
655 | { | 653 | { |
656 | if (bdi_read_congested(target->bt_bdi)) | 654 | if (bdi_read_congested(target->bt_bdi)) |
657 | return; | 655 | return; |
658 | 656 | ||
659 | xfs_buf_read(target, ioff, isize, | 657 | xfs_buf_read(target, blkno, numblks, |
660 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); | 658 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); |
661 | } | 659 | } |
662 | 660 | ||
663 | /* | 661 | /* |
@@ -666,16 +664,15 @@ xfs_buf_readahead( | |||
666 | */ | 664 | */ |
667 | struct xfs_buf * | 665 | struct xfs_buf * |
668 | xfs_buf_read_uncached( | 666 | xfs_buf_read_uncached( |
669 | struct xfs_mount *mp, | ||
670 | struct xfs_buftarg *target, | 667 | struct xfs_buftarg *target, |
671 | xfs_daddr_t daddr, | 668 | xfs_daddr_t daddr, |
672 | size_t length, | 669 | size_t numblks, |
673 | int flags) | 670 | int flags) |
674 | { | 671 | { |
675 | xfs_buf_t *bp; | 672 | xfs_buf_t *bp; |
676 | int error; | 673 | int error; |
677 | 674 | ||
678 | bp = xfs_buf_get_uncached(target, length, flags); | 675 | bp = xfs_buf_get_uncached(target, numblks, flags); |
679 | if (!bp) | 676 | if (!bp) |
680 | return NULL; | 677 | return NULL; |
681 | 678 | ||
@@ -683,9 +680,9 @@ xfs_buf_read_uncached( | |||
683 | XFS_BUF_SET_ADDR(bp, daddr); | 680 | XFS_BUF_SET_ADDR(bp, daddr); |
684 | XFS_BUF_READ(bp); | 681 | XFS_BUF_READ(bp); |
685 | 682 | ||
686 | xfsbdstrat(mp, bp); | 683 | xfsbdstrat(target->bt_mount, bp); |
687 | error = xfs_buf_iowait(bp); | 684 | error = xfs_buf_iowait(bp); |
688 | if (error || bp->b_error) { | 685 | if (error) { |
689 | xfs_buf_relse(bp); | 686 | xfs_buf_relse(bp); |
690 | return NULL; | 687 | return NULL; |
691 | } | 688 | } |
@@ -699,7 +696,7 @@ xfs_buf_read_uncached( | |||
699 | void | 696 | void |
700 | xfs_buf_set_empty( | 697 | xfs_buf_set_empty( |
701 | struct xfs_buf *bp, | 698 | struct xfs_buf *bp, |
702 | size_t len) | 699 | size_t numblks) |
703 | { | 700 | { |
704 | if (bp->b_pages) | 701 | if (bp->b_pages) |
705 | _xfs_buf_free_pages(bp); | 702 | _xfs_buf_free_pages(bp); |
@@ -707,10 +704,9 @@ xfs_buf_set_empty( | |||
707 | bp->b_pages = NULL; | 704 | bp->b_pages = NULL; |
708 | bp->b_page_count = 0; | 705 | bp->b_page_count = 0; |
709 | bp->b_addr = NULL; | 706 | bp->b_addr = NULL; |
710 | bp->b_file_offset = 0; | 707 | bp->b_length = numblks; |
711 | bp->b_buffer_length = bp->b_count_desired = len; | 708 | bp->b_io_length = numblks; |
712 | bp->b_bn = XFS_BUF_DADDR_NULL; | 709 | bp->b_bn = XFS_BUF_DADDR_NULL; |
713 | bp->b_flags &= ~XBF_MAPPED; | ||
714 | } | 710 | } |
715 | 711 | ||
716 | static inline struct page * | 712 | static inline struct page * |
@@ -749,7 +745,7 @@ xfs_buf_associate_memory( | |||
749 | bp->b_pages = NULL; | 745 | bp->b_pages = NULL; |
750 | bp->b_addr = mem; | 746 | bp->b_addr = mem; |
751 | 747 | ||
752 | rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK); | 748 | rval = _xfs_buf_get_pages(bp, page_count, 0); |
753 | if (rval) | 749 | if (rval) |
754 | return rval; | 750 | return rval; |
755 | 751 | ||
@@ -760,9 +756,8 @@ xfs_buf_associate_memory( | |||
760 | pageaddr += PAGE_SIZE; | 756 | pageaddr += PAGE_SIZE; |
761 | } | 757 | } |
762 | 758 | ||
763 | bp->b_count_desired = len; | 759 | bp->b_io_length = BTOBB(len); |
764 | bp->b_buffer_length = buflen; | 760 | bp->b_length = BTOBB(buflen); |
765 | bp->b_flags |= XBF_MAPPED; | ||
766 | 761 | ||
767 | return 0; | 762 | return 0; |
768 | } | 763 | } |
@@ -770,17 +765,18 @@ xfs_buf_associate_memory( | |||
770 | xfs_buf_t * | 765 | xfs_buf_t * |
771 | xfs_buf_get_uncached( | 766 | xfs_buf_get_uncached( |
772 | struct xfs_buftarg *target, | 767 | struct xfs_buftarg *target, |
773 | size_t len, | 768 | size_t numblks, |
774 | int flags) | 769 | int flags) |
775 | { | 770 | { |
776 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; | 771 | unsigned long page_count; |
777 | int error, i; | 772 | int error, i; |
778 | xfs_buf_t *bp; | 773 | xfs_buf_t *bp; |
779 | 774 | ||
780 | bp = xfs_buf_alloc(target, 0, len, 0); | 775 | bp = xfs_buf_alloc(target, 0, numblks, 0); |
781 | if (unlikely(bp == NULL)) | 776 | if (unlikely(bp == NULL)) |
782 | goto fail; | 777 | goto fail; |
783 | 778 | ||
779 | page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; | ||
784 | error = _xfs_buf_get_pages(bp, page_count, 0); | 780 | error = _xfs_buf_get_pages(bp, page_count, 0); |
785 | if (error) | 781 | if (error) |
786 | goto fail_free_buf; | 782 | goto fail_free_buf; |
@@ -792,7 +788,7 @@ xfs_buf_get_uncached( | |||
792 | } | 788 | } |
793 | bp->b_flags |= _XBF_PAGES; | 789 | bp->b_flags |= _XBF_PAGES; |
794 | 790 | ||
795 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); | 791 | error = _xfs_buf_map_pages(bp, 0); |
796 | if (unlikely(error)) { | 792 | if (unlikely(error)) { |
797 | xfs_warn(target->bt_mount, | 793 | xfs_warn(target->bt_mount, |
798 | "%s: failed to map pages\n", __func__); | 794 | "%s: failed to map pages\n", __func__); |
@@ -855,7 +851,7 @@ xfs_buf_rele( | |||
855 | spin_unlock(&pag->pag_buf_lock); | 851 | spin_unlock(&pag->pag_buf_lock); |
856 | } else { | 852 | } else { |
857 | xfs_buf_lru_del(bp); | 853 | xfs_buf_lru_del(bp); |
858 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 854 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); |
859 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); | 855 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
860 | spin_unlock(&pag->pag_buf_lock); | 856 | spin_unlock(&pag->pag_buf_lock); |
861 | xfs_perag_put(pag); | 857 | xfs_perag_put(pag); |
@@ -915,13 +911,6 @@ xfs_buf_lock( | |||
915 | trace_xfs_buf_lock_done(bp, _RET_IP_); | 911 | trace_xfs_buf_lock_done(bp, _RET_IP_); |
916 | } | 912 | } |
917 | 913 | ||
918 | /* | ||
919 | * Releases the lock on the buffer object. | ||
920 | * If the buffer is marked delwri but is not queued, do so before we | ||
921 | * unlock the buffer as we need to set flags correctly. We also need to | ||
922 | * take a reference for the delwri queue because the unlocker is going to | ||
923 | * drop their's and they don't know we just queued it. | ||
924 | */ | ||
925 | void | 914 | void |
926 | xfs_buf_unlock( | 915 | xfs_buf_unlock( |
927 | struct xfs_buf *bp) | 916 | struct xfs_buf *bp) |
@@ -1008,9 +997,8 @@ xfs_buf_ioerror_alert( | |||
1008 | const char *func) | 997 | const char *func) |
1009 | { | 998 | { |
1010 | xfs_alert(bp->b_target->bt_mount, | 999 | xfs_alert(bp->b_target->bt_mount, |
1011 | "metadata I/O error: block 0x%llx (\"%s\") error %d buf count %zd", | 1000 | "metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", |
1012 | (__uint64_t)XFS_BUF_ADDR(bp), func, | 1001 | (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); |
1013 | bp->b_error, XFS_BUF_COUNT(bp)); | ||
1014 | } | 1002 | } |
1015 | 1003 | ||
1016 | int | 1004 | int |
@@ -1019,10 +1007,11 @@ xfs_bwrite( | |||
1019 | { | 1007 | { |
1020 | int error; | 1008 | int error; |
1021 | 1009 | ||
1010 | ASSERT(xfs_buf_islocked(bp)); | ||
1011 | |||
1022 | bp->b_flags |= XBF_WRITE; | 1012 | bp->b_flags |= XBF_WRITE; |
1023 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ); | 1013 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); |
1024 | 1014 | ||
1025 | xfs_buf_delwri_dequeue(bp); | ||
1026 | xfs_bdstrat_cb(bp); | 1015 | xfs_bdstrat_cb(bp); |
1027 | 1016 | ||
1028 | error = xfs_buf_iowait(bp); | 1017 | error = xfs_buf_iowait(bp); |
@@ -1181,7 +1170,7 @@ _xfs_buf_ioapply( | |||
1181 | int rw, map_i, total_nr_pages, nr_pages; | 1170 | int rw, map_i, total_nr_pages, nr_pages; |
1182 | struct bio *bio; | 1171 | struct bio *bio; |
1183 | int offset = bp->b_offset; | 1172 | int offset = bp->b_offset; |
1184 | int size = bp->b_count_desired; | 1173 | int size = BBTOB(bp->b_io_length); |
1185 | sector_t sector = bp->b_bn; | 1174 | sector_t sector = bp->b_bn; |
1186 | 1175 | ||
1187 | total_nr_pages = bp->b_page_count; | 1176 | total_nr_pages = bp->b_page_count; |
@@ -1229,7 +1218,7 @@ next_chunk: | |||
1229 | break; | 1218 | break; |
1230 | 1219 | ||
1231 | offset = 0; | 1220 | offset = 0; |
1232 | sector += nbytes >> BBSHIFT; | 1221 | sector += BTOBB(nbytes); |
1233 | size -= nbytes; | 1222 | size -= nbytes; |
1234 | total_nr_pages--; | 1223 | total_nr_pages--; |
1235 | } | 1224 | } |
@@ -1248,13 +1237,13 @@ next_chunk: | |||
1248 | } | 1237 | } |
1249 | } | 1238 | } |
1250 | 1239 | ||
1251 | int | 1240 | void |
1252 | xfs_buf_iorequest( | 1241 | xfs_buf_iorequest( |
1253 | xfs_buf_t *bp) | 1242 | xfs_buf_t *bp) |
1254 | { | 1243 | { |
1255 | trace_xfs_buf_iorequest(bp, _RET_IP_); | 1244 | trace_xfs_buf_iorequest(bp, _RET_IP_); |
1256 | 1245 | ||
1257 | ASSERT(!(bp->b_flags & XBF_DELWRI)); | 1246 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); |
1258 | 1247 | ||
1259 | if (bp->b_flags & XBF_WRITE) | 1248 | if (bp->b_flags & XBF_WRITE) |
1260 | xfs_buf_wait_unpin(bp); | 1249 | xfs_buf_wait_unpin(bp); |
@@ -1269,13 +1258,12 @@ xfs_buf_iorequest( | |||
1269 | _xfs_buf_ioend(bp, 0); | 1258 | _xfs_buf_ioend(bp, 0); |
1270 | 1259 | ||
1271 | xfs_buf_rele(bp); | 1260 | xfs_buf_rele(bp); |
1272 | return 0; | ||
1273 | } | 1261 | } |
1274 | 1262 | ||
1275 | /* | 1263 | /* |
1276 | * Waits for I/O to complete on the buffer supplied. | 1264 | * Waits for I/O to complete on the buffer supplied. It returns immediately if |
1277 | * It returns immediately if no I/O is pending. | 1265 | * no I/O is pending or there is already a pending error on the buffer. It |
1278 | * It returns the I/O error code, if any, or 0 if there was no error. | 1266 | * returns the I/O error code, if any, or 0 if there was no error. |
1279 | */ | 1267 | */ |
1280 | int | 1268 | int |
1281 | xfs_buf_iowait( | 1269 | xfs_buf_iowait( |
@@ -1283,7 +1271,8 @@ xfs_buf_iowait( | |||
1283 | { | 1271 | { |
1284 | trace_xfs_buf_iowait(bp, _RET_IP_); | 1272 | trace_xfs_buf_iowait(bp, _RET_IP_); |
1285 | 1273 | ||
1286 | wait_for_completion(&bp->b_iowait); | 1274 | if (!bp->b_error) |
1275 | wait_for_completion(&bp->b_iowait); | ||
1287 | 1276 | ||
1288 | trace_xfs_buf_iowait_done(bp, _RET_IP_); | 1277 | trace_xfs_buf_iowait_done(bp, _RET_IP_); |
1289 | return bp->b_error; | 1278 | return bp->b_error; |
@@ -1296,7 +1285,7 @@ xfs_buf_offset( | |||
1296 | { | 1285 | { |
1297 | struct page *page; | 1286 | struct page *page; |
1298 | 1287 | ||
1299 | if (bp->b_flags & XBF_MAPPED) | 1288 | if (bp->b_addr) |
1300 | return bp->b_addr + offset; | 1289 | return bp->b_addr + offset; |
1301 | 1290 | ||
1302 | offset += bp->b_offset; | 1291 | offset += bp->b_offset; |
@@ -1315,27 +1304,30 @@ xfs_buf_iomove( | |||
1315 | void *data, /* data address */ | 1304 | void *data, /* data address */ |
1316 | xfs_buf_rw_t mode) /* read/write/zero flag */ | 1305 | xfs_buf_rw_t mode) /* read/write/zero flag */ |
1317 | { | 1306 | { |
1318 | size_t bend, cpoff, csize; | 1307 | size_t bend; |
1319 | struct page *page; | ||
1320 | 1308 | ||
1321 | bend = boff + bsize; | 1309 | bend = boff + bsize; |
1322 | while (boff < bend) { | 1310 | while (boff < bend) { |
1323 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; | 1311 | struct page *page; |
1324 | cpoff = xfs_buf_poff(boff + bp->b_offset); | 1312 | int page_index, page_offset, csize; |
1325 | csize = min_t(size_t, | 1313 | |
1326 | PAGE_SIZE-cpoff, bp->b_count_desired-boff); | 1314 | page_index = (boff + bp->b_offset) >> PAGE_SHIFT; |
1315 | page_offset = (boff + bp->b_offset) & ~PAGE_MASK; | ||
1316 | page = bp->b_pages[page_index]; | ||
1317 | csize = min_t(size_t, PAGE_SIZE - page_offset, | ||
1318 | BBTOB(bp->b_io_length) - boff); | ||
1327 | 1319 | ||
1328 | ASSERT(((csize + cpoff) <= PAGE_SIZE)); | 1320 | ASSERT((csize + page_offset) <= PAGE_SIZE); |
1329 | 1321 | ||
1330 | switch (mode) { | 1322 | switch (mode) { |
1331 | case XBRW_ZERO: | 1323 | case XBRW_ZERO: |
1332 | memset(page_address(page) + cpoff, 0, csize); | 1324 | memset(page_address(page) + page_offset, 0, csize); |
1333 | break; | 1325 | break; |
1334 | case XBRW_READ: | 1326 | case XBRW_READ: |
1335 | memcpy(data, page_address(page) + cpoff, csize); | 1327 | memcpy(data, page_address(page) + page_offset, csize); |
1336 | break; | 1328 | break; |
1337 | case XBRW_WRITE: | 1329 | case XBRW_WRITE: |
1338 | memcpy(page_address(page) + cpoff, data, csize); | 1330 | memcpy(page_address(page) + page_offset, data, csize); |
1339 | } | 1331 | } |
1340 | 1332 | ||
1341 | boff += csize; | 1333 | boff += csize; |
@@ -1435,11 +1427,9 @@ xfs_free_buftarg( | |||
1435 | { | 1427 | { |
1436 | unregister_shrinker(&btp->bt_shrinker); | 1428 | unregister_shrinker(&btp->bt_shrinker); |
1437 | 1429 | ||
1438 | xfs_flush_buftarg(btp, 1); | ||
1439 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1430 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1440 | xfs_blkdev_issue_flush(btp); | 1431 | xfs_blkdev_issue_flush(btp); |
1441 | 1432 | ||
1442 | kthread_stop(btp->bt_task); | ||
1443 | kmem_free(btp); | 1433 | kmem_free(btp); |
1444 | } | 1434 | } |
1445 | 1435 | ||
@@ -1491,20 +1481,6 @@ xfs_setsize_buftarg( | |||
1491 | return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); | 1481 | return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); |
1492 | } | 1482 | } |
1493 | 1483 | ||
1494 | STATIC int | ||
1495 | xfs_alloc_delwri_queue( | ||
1496 | xfs_buftarg_t *btp, | ||
1497 | const char *fsname) | ||
1498 | { | ||
1499 | INIT_LIST_HEAD(&btp->bt_delwri_queue); | ||
1500 | spin_lock_init(&btp->bt_delwri_lock); | ||
1501 | btp->bt_flags = 0; | ||
1502 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); | ||
1503 | if (IS_ERR(btp->bt_task)) | ||
1504 | return PTR_ERR(btp->bt_task); | ||
1505 | return 0; | ||
1506 | } | ||
1507 | |||
1508 | xfs_buftarg_t * | 1484 | xfs_buftarg_t * |
1509 | xfs_alloc_buftarg( | 1485 | xfs_alloc_buftarg( |
1510 | struct xfs_mount *mp, | 1486 | struct xfs_mount *mp, |
@@ -1527,8 +1503,6 @@ xfs_alloc_buftarg( | |||
1527 | spin_lock_init(&btp->bt_lru_lock); | 1503 | spin_lock_init(&btp->bt_lru_lock); |
1528 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1504 | if (xfs_setsize_buftarg_early(btp, bdev)) |
1529 | goto error; | 1505 | goto error; |
1530 | if (xfs_alloc_delwri_queue(btp, fsname)) | ||
1531 | goto error; | ||
1532 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | 1506 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; |
1533 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | 1507 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; |
1534 | register_shrinker(&btp->bt_shrinker); | 1508 | register_shrinker(&btp->bt_shrinker); |
@@ -1539,125 +1513,52 @@ error: | |||
1539 | return NULL; | 1513 | return NULL; |
1540 | } | 1514 | } |
1541 | 1515 | ||
1542 | |||
1543 | /* | 1516 | /* |
1544 | * Delayed write buffer handling | 1517 | * Add a buffer to the delayed write list. |
1518 | * | ||
1519 | * This queues a buffer for writeout if it hasn't already been. Note that | ||
1520 | * neither this routine nor the buffer list submission functions perform | ||
1521 | * any internal synchronization. It is expected that the lists are thread-local | ||
1522 | * to the callers. | ||
1523 | * | ||
1524 | * Returns true if we queued up the buffer, or false if it already had | ||
1525 | * been on the buffer list. | ||
1545 | */ | 1526 | */ |
1546 | void | 1527 | bool |
1547 | xfs_buf_delwri_queue( | 1528 | xfs_buf_delwri_queue( |
1548 | xfs_buf_t *bp) | 1529 | struct xfs_buf *bp, |
1530 | struct list_head *list) | ||
1549 | { | 1531 | { |
1550 | struct xfs_buftarg *btp = bp->b_target; | 1532 | ASSERT(xfs_buf_islocked(bp)); |
1551 | |||
1552 | trace_xfs_buf_delwri_queue(bp, _RET_IP_); | ||
1553 | |||
1554 | ASSERT(!(bp->b_flags & XBF_READ)); | 1533 | ASSERT(!(bp->b_flags & XBF_READ)); |
1555 | 1534 | ||
1556 | spin_lock(&btp->bt_delwri_lock); | 1535 | /* |
1557 | if (!list_empty(&bp->b_list)) { | 1536 | * If the buffer is already marked delwri it already is queued up |
1558 | /* if already in the queue, move it to the tail */ | 1537 | * by someone else for imediate writeout. Just ignore it in that |
1559 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | 1538 | * case. |
1560 | list_move_tail(&bp->b_list, &btp->bt_delwri_queue); | 1539 | */ |
1561 | } else { | 1540 | if (bp->b_flags & _XBF_DELWRI_Q) { |
1562 | /* start xfsbufd as it is about to have something to do */ | 1541 | trace_xfs_buf_delwri_queued(bp, _RET_IP_); |
1563 | if (list_empty(&btp->bt_delwri_queue)) | 1542 | return false; |
1564 | wake_up_process(bp->b_target->bt_task); | ||
1565 | |||
1566 | atomic_inc(&bp->b_hold); | ||
1567 | bp->b_flags |= XBF_DELWRI | _XBF_DELWRI_Q | XBF_ASYNC; | ||
1568 | list_add_tail(&bp->b_list, &btp->bt_delwri_queue); | ||
1569 | } | ||
1570 | bp->b_queuetime = jiffies; | ||
1571 | spin_unlock(&btp->bt_delwri_lock); | ||
1572 | } | ||
1573 | |||
1574 | void | ||
1575 | xfs_buf_delwri_dequeue( | ||
1576 | xfs_buf_t *bp) | ||
1577 | { | ||
1578 | int dequeued = 0; | ||
1579 | |||
1580 | spin_lock(&bp->b_target->bt_delwri_lock); | ||
1581 | if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { | ||
1582 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | ||
1583 | list_del_init(&bp->b_list); | ||
1584 | dequeued = 1; | ||
1585 | } | 1543 | } |
1586 | bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); | ||
1587 | spin_unlock(&bp->b_target->bt_delwri_lock); | ||
1588 | |||
1589 | if (dequeued) | ||
1590 | xfs_buf_rele(bp); | ||
1591 | |||
1592 | trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); | ||
1593 | } | ||
1594 | |||
1595 | /* | ||
1596 | * If a delwri buffer needs to be pushed before it has aged out, then promote | ||
1597 | * it to the head of the delwri queue so that it will be flushed on the next | ||
1598 | * xfsbufd run. We do this by resetting the queuetime of the buffer to be older | ||
1599 | * than the age currently needed to flush the buffer. Hence the next time the | ||
1600 | * xfsbufd sees it is guaranteed to be considered old enough to flush. | ||
1601 | */ | ||
1602 | void | ||
1603 | xfs_buf_delwri_promote( | ||
1604 | struct xfs_buf *bp) | ||
1605 | { | ||
1606 | struct xfs_buftarg *btp = bp->b_target; | ||
1607 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; | ||
1608 | 1544 | ||
1609 | ASSERT(bp->b_flags & XBF_DELWRI); | 1545 | trace_xfs_buf_delwri_queue(bp, _RET_IP_); |
1610 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | ||
1611 | 1546 | ||
1612 | /* | 1547 | /* |
1613 | * Check the buffer age before locking the delayed write queue as we | 1548 | * If a buffer gets written out synchronously or marked stale while it |
1614 | * don't need to promote buffers that are already past the flush age. | 1549 | * is on a delwri list we lazily remove it. To do this, the other party |
1550 | * clears the _XBF_DELWRI_Q flag but otherwise leaves the buffer alone. | ||
1551 | * It remains referenced and on the list. In a rare corner case it | ||
1552 | * might get readded to a delwri list after the synchronous writeout, in | ||
1553 | * which case we need just need to re-add the flag here. | ||
1615 | */ | 1554 | */ |
1616 | if (bp->b_queuetime < jiffies - age) | 1555 | bp->b_flags |= _XBF_DELWRI_Q; |
1617 | return; | 1556 | if (list_empty(&bp->b_list)) { |
1618 | bp->b_queuetime = jiffies - age; | 1557 | atomic_inc(&bp->b_hold); |
1619 | spin_lock(&btp->bt_delwri_lock); | 1558 | list_add_tail(&bp->b_list, list); |
1620 | list_move(&bp->b_list, &btp->bt_delwri_queue); | ||
1621 | spin_unlock(&btp->bt_delwri_lock); | ||
1622 | } | ||
1623 | |||
1624 | /* | ||
1625 | * Move as many buffers as specified to the supplied list | ||
1626 | * idicating if we skipped any buffers to prevent deadlocks. | ||
1627 | */ | ||
1628 | STATIC int | ||
1629 | xfs_buf_delwri_split( | ||
1630 | xfs_buftarg_t *target, | ||
1631 | struct list_head *list, | ||
1632 | unsigned long age) | ||
1633 | { | ||
1634 | xfs_buf_t *bp, *n; | ||
1635 | int skipped = 0; | ||
1636 | int force; | ||
1637 | |||
1638 | force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); | ||
1639 | INIT_LIST_HEAD(list); | ||
1640 | spin_lock(&target->bt_delwri_lock); | ||
1641 | list_for_each_entry_safe(bp, n, &target->bt_delwri_queue, b_list) { | ||
1642 | ASSERT(bp->b_flags & XBF_DELWRI); | ||
1643 | |||
1644 | if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { | ||
1645 | if (!force && | ||
1646 | time_before(jiffies, bp->b_queuetime + age)) { | ||
1647 | xfs_buf_unlock(bp); | ||
1648 | break; | ||
1649 | } | ||
1650 | |||
1651 | bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q); | ||
1652 | bp->b_flags |= XBF_WRITE; | ||
1653 | list_move_tail(&bp->b_list, list); | ||
1654 | trace_xfs_buf_delwri_split(bp, _RET_IP_); | ||
1655 | } else | ||
1656 | skipped++; | ||
1657 | } | 1559 | } |
1658 | 1560 | ||
1659 | spin_unlock(&target->bt_delwri_lock); | 1561 | return true; |
1660 | return skipped; | ||
1661 | } | 1562 | } |
1662 | 1563 | ||
1663 | /* | 1564 | /* |
@@ -1683,99 +1584,109 @@ xfs_buf_cmp( | |||
1683 | return 0; | 1584 | return 0; |
1684 | } | 1585 | } |
1685 | 1586 | ||
1686 | STATIC int | 1587 | static int |
1687 | xfsbufd( | 1588 | __xfs_buf_delwri_submit( |
1688 | void *data) | 1589 | struct list_head *buffer_list, |
1590 | struct list_head *io_list, | ||
1591 | bool wait) | ||
1689 | { | 1592 | { |
1690 | xfs_buftarg_t *target = (xfs_buftarg_t *)data; | 1593 | struct blk_plug plug; |
1691 | 1594 | struct xfs_buf *bp, *n; | |
1692 | current->flags |= PF_MEMALLOC; | 1595 | int pinned = 0; |
1693 | 1596 | ||
1694 | set_freezable(); | 1597 | list_for_each_entry_safe(bp, n, buffer_list, b_list) { |
1598 | if (!wait) { | ||
1599 | if (xfs_buf_ispinned(bp)) { | ||
1600 | pinned++; | ||
1601 | continue; | ||
1602 | } | ||
1603 | if (!xfs_buf_trylock(bp)) | ||
1604 | continue; | ||
1605 | } else { | ||
1606 | xfs_buf_lock(bp); | ||
1607 | } | ||
1695 | 1608 | ||
1696 | do { | 1609 | /* |
1697 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | 1610 | * Someone else might have written the buffer synchronously or |
1698 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); | 1611 | * marked it stale in the meantime. In that case only the |
1699 | struct list_head tmp; | 1612 | * _XBF_DELWRI_Q flag got cleared, and we have to drop the |
1700 | struct blk_plug plug; | 1613 | * reference and remove it from the list here. |
1614 | */ | ||
1615 | if (!(bp->b_flags & _XBF_DELWRI_Q)) { | ||
1616 | list_del_init(&bp->b_list); | ||
1617 | xfs_buf_relse(bp); | ||
1618 | continue; | ||
1619 | } | ||
1701 | 1620 | ||
1702 | if (unlikely(freezing(current))) | 1621 | list_move_tail(&bp->b_list, io_list); |
1703 | try_to_freeze(); | 1622 | trace_xfs_buf_delwri_split(bp, _RET_IP_); |
1623 | } | ||
1704 | 1624 | ||
1705 | /* sleep for a long time if there is nothing to do. */ | 1625 | list_sort(NULL, io_list, xfs_buf_cmp); |
1706 | if (list_empty(&target->bt_delwri_queue)) | ||
1707 | tout = MAX_SCHEDULE_TIMEOUT; | ||
1708 | schedule_timeout_interruptible(tout); | ||
1709 | 1626 | ||
1710 | xfs_buf_delwri_split(target, &tmp, age); | 1627 | blk_start_plug(&plug); |
1711 | list_sort(NULL, &tmp, xfs_buf_cmp); | 1628 | list_for_each_entry_safe(bp, n, io_list, b_list) { |
1629 | bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); | ||
1630 | bp->b_flags |= XBF_WRITE; | ||
1712 | 1631 | ||
1713 | blk_start_plug(&plug); | 1632 | if (!wait) { |
1714 | while (!list_empty(&tmp)) { | 1633 | bp->b_flags |= XBF_ASYNC; |
1715 | struct xfs_buf *bp; | ||
1716 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); | ||
1717 | list_del_init(&bp->b_list); | 1634 | list_del_init(&bp->b_list); |
1718 | xfs_bdstrat_cb(bp); | ||
1719 | } | 1635 | } |
1720 | blk_finish_plug(&plug); | 1636 | xfs_bdstrat_cb(bp); |
1721 | } while (!kthread_should_stop()); | 1637 | } |
1638 | blk_finish_plug(&plug); | ||
1722 | 1639 | ||
1723 | return 0; | 1640 | return pinned; |
1724 | } | 1641 | } |
1725 | 1642 | ||
1726 | /* | 1643 | /* |
1727 | * Go through all incore buffers, and release buffers if they belong to | 1644 | * Write out a buffer list asynchronously. |
1728 | * the given device. This is used in filesystem error handling to | 1645 | * |
1729 | * preserve the consistency of its metadata. | 1646 | * This will take the @buffer_list, write all non-locked and non-pinned buffers |
1647 | * out and not wait for I/O completion on any of the buffers. This interface | ||
1648 | * is only safely useable for callers that can track I/O completion by higher | ||
1649 | * level means, e.g. AIL pushing as the @buffer_list is consumed in this | ||
1650 | * function. | ||
1730 | */ | 1651 | */ |
1731 | int | 1652 | int |
1732 | xfs_flush_buftarg( | 1653 | xfs_buf_delwri_submit_nowait( |
1733 | xfs_buftarg_t *target, | 1654 | struct list_head *buffer_list) |
1734 | int wait) | ||
1735 | { | 1655 | { |
1736 | xfs_buf_t *bp; | 1656 | LIST_HEAD (io_list); |
1737 | int pincount = 0; | 1657 | return __xfs_buf_delwri_submit(buffer_list, &io_list, false); |
1738 | LIST_HEAD(tmp_list); | 1658 | } |
1739 | LIST_HEAD(wait_list); | ||
1740 | struct blk_plug plug; | ||
1741 | 1659 | ||
1742 | flush_workqueue(xfslogd_workqueue); | 1660 | /* |
1661 | * Write out a buffer list synchronously. | ||
1662 | * | ||
1663 | * This will take the @buffer_list, write all buffers out and wait for I/O | ||
1664 | * completion on all of the buffers. @buffer_list is consumed by the function, | ||
1665 | * so callers must have some other way of tracking buffers if they require such | ||
1666 | * functionality. | ||
1667 | */ | ||
1668 | int | ||
1669 | xfs_buf_delwri_submit( | ||
1670 | struct list_head *buffer_list) | ||
1671 | { | ||
1672 | LIST_HEAD (io_list); | ||
1673 | int error = 0, error2; | ||
1674 | struct xfs_buf *bp; | ||
1743 | 1675 | ||
1744 | set_bit(XBT_FORCE_FLUSH, &target->bt_flags); | 1676 | __xfs_buf_delwri_submit(buffer_list, &io_list, true); |
1745 | pincount = xfs_buf_delwri_split(target, &tmp_list, 0); | ||
1746 | 1677 | ||
1747 | /* | 1678 | /* Wait for IO to complete. */ |
1748 | * Dropped the delayed write list lock, now walk the temporary list. | 1679 | while (!list_empty(&io_list)) { |
1749 | * All I/O is issued async and then if we need to wait for completion | 1680 | bp = list_first_entry(&io_list, struct xfs_buf, b_list); |
1750 | * we do that after issuing all the IO. | ||
1751 | */ | ||
1752 | list_sort(NULL, &tmp_list, xfs_buf_cmp); | ||
1753 | 1681 | ||
1754 | blk_start_plug(&plug); | ||
1755 | while (!list_empty(&tmp_list)) { | ||
1756 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); | ||
1757 | ASSERT(target == bp->b_target); | ||
1758 | list_del_init(&bp->b_list); | 1682 | list_del_init(&bp->b_list); |
1759 | if (wait) { | 1683 | error2 = xfs_buf_iowait(bp); |
1760 | bp->b_flags &= ~XBF_ASYNC; | 1684 | xfs_buf_relse(bp); |
1761 | list_add(&bp->b_list, &wait_list); | 1685 | if (!error) |
1762 | } | 1686 | error = error2; |
1763 | xfs_bdstrat_cb(bp); | ||
1764 | } | ||
1765 | blk_finish_plug(&plug); | ||
1766 | |||
1767 | if (wait) { | ||
1768 | /* Wait for IO to complete. */ | ||
1769 | while (!list_empty(&wait_list)) { | ||
1770 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | ||
1771 | |||
1772 | list_del_init(&bp->b_list); | ||
1773 | xfs_buf_iowait(bp); | ||
1774 | xfs_buf_relse(bp); | ||
1775 | } | ||
1776 | } | 1687 | } |
1777 | 1688 | ||
1778 | return pincount; | 1689 | return error; |
1779 | } | 1690 | } |
1780 | 1691 | ||
1781 | int __init | 1692 | int __init |
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 5bf3be45f543..7f1d1392ce37 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -32,11 +32,6 @@ | |||
32 | 32 | ||
33 | #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) | 33 | #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) |
34 | 34 | ||
35 | #define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) | ||
36 | #define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) | ||
37 | #define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) | ||
38 | #define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) | ||
39 | |||
40 | typedef enum { | 35 | typedef enum { |
41 | XBRW_READ = 1, /* transfer into target memory */ | 36 | XBRW_READ = 1, /* transfer into target memory */ |
42 | XBRW_WRITE = 2, /* transfer from target memory */ | 37 | XBRW_WRITE = 2, /* transfer from target memory */ |
@@ -46,11 +41,9 @@ typedef enum { | |||
46 | #define XBF_READ (1 << 0) /* buffer intended for reading from device */ | 41 | #define XBF_READ (1 << 0) /* buffer intended for reading from device */ |
47 | #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ | 42 | #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ |
48 | #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ | 43 | #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ |
49 | #define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ | ||
50 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ | 44 | #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ |
51 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ | 45 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ |
52 | #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ | 46 | #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ |
53 | #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ | ||
54 | 47 | ||
55 | /* I/O hints for the BIO layer */ | 48 | /* I/O hints for the BIO layer */ |
56 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ | 49 | #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ |
@@ -58,14 +51,13 @@ typedef enum { | |||
58 | #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ | 51 | #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ |
59 | 52 | ||
60 | /* flags used only as arguments to access routines */ | 53 | /* flags used only as arguments to access routines */ |
61 | #define XBF_LOCK (1 << 15)/* lock requested */ | ||
62 | #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ | 54 | #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ |
63 | #define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */ | 55 | #define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ |
64 | 56 | ||
65 | /* flags used only internally */ | 57 | /* flags used only internally */ |
66 | #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ | 58 | #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ |
67 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ | 59 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ |
68 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */ | 60 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ |
69 | 61 | ||
70 | typedef unsigned int xfs_buf_flags_t; | 62 | typedef unsigned int xfs_buf_flags_t; |
71 | 63 | ||
@@ -73,25 +65,18 @@ typedef unsigned int xfs_buf_flags_t; | |||
73 | { XBF_READ, "READ" }, \ | 65 | { XBF_READ, "READ" }, \ |
74 | { XBF_WRITE, "WRITE" }, \ | 66 | { XBF_WRITE, "WRITE" }, \ |
75 | { XBF_READ_AHEAD, "READ_AHEAD" }, \ | 67 | { XBF_READ_AHEAD, "READ_AHEAD" }, \ |
76 | { XBF_MAPPED, "MAPPED" }, \ | ||
77 | { XBF_ASYNC, "ASYNC" }, \ | 68 | { XBF_ASYNC, "ASYNC" }, \ |
78 | { XBF_DONE, "DONE" }, \ | 69 | { XBF_DONE, "DONE" }, \ |
79 | { XBF_DELWRI, "DELWRI" }, \ | ||
80 | { XBF_STALE, "STALE" }, \ | 70 | { XBF_STALE, "STALE" }, \ |
81 | { XBF_SYNCIO, "SYNCIO" }, \ | 71 | { XBF_SYNCIO, "SYNCIO" }, \ |
82 | { XBF_FUA, "FUA" }, \ | 72 | { XBF_FUA, "FUA" }, \ |
83 | { XBF_FLUSH, "FLUSH" }, \ | 73 | { XBF_FLUSH, "FLUSH" }, \ |
84 | { XBF_LOCK, "LOCK" }, /* should never be set */\ | 74 | { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ |
85 | { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ | 75 | { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ |
86 | { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ | ||
87 | { _XBF_PAGES, "PAGES" }, \ | 76 | { _XBF_PAGES, "PAGES" }, \ |
88 | { _XBF_KMEM, "KMEM" }, \ | 77 | { _XBF_KMEM, "KMEM" }, \ |
89 | { _XBF_DELWRI_Q, "DELWRI_Q" } | 78 | { _XBF_DELWRI_Q, "DELWRI_Q" } |
90 | 79 | ||
91 | typedef enum { | ||
92 | XBT_FORCE_FLUSH = 0, | ||
93 | } xfs_buftarg_flags_t; | ||
94 | |||
95 | typedef struct xfs_buftarg { | 80 | typedef struct xfs_buftarg { |
96 | dev_t bt_dev; | 81 | dev_t bt_dev; |
97 | struct block_device *bt_bdev; | 82 | struct block_device *bt_bdev; |
@@ -101,12 +86,6 @@ typedef struct xfs_buftarg { | |||
101 | unsigned int bt_sshift; | 86 | unsigned int bt_sshift; |
102 | size_t bt_smask; | 87 | size_t bt_smask; |
103 | 88 | ||
104 | /* per device delwri queue */ | ||
105 | struct task_struct *bt_task; | ||
106 | struct list_head bt_delwri_queue; | ||
107 | spinlock_t bt_delwri_lock; | ||
108 | unsigned long bt_flags; | ||
109 | |||
110 | /* LRU control structures */ | 89 | /* LRU control structures */ |
111 | struct shrinker bt_shrinker; | 90 | struct shrinker bt_shrinker; |
112 | struct list_head bt_lru; | 91 | struct list_head bt_lru; |
@@ -128,8 +107,8 @@ typedef struct xfs_buf { | |||
128 | * fast-path on locking. | 107 | * fast-path on locking. |
129 | */ | 108 | */ |
130 | struct rb_node b_rbnode; /* rbtree node */ | 109 | struct rb_node b_rbnode; /* rbtree node */ |
131 | xfs_off_t b_file_offset; /* offset in file */ | 110 | xfs_daddr_t b_bn; /* block number for I/O */ |
132 | size_t b_buffer_length;/* size of buffer in bytes */ | 111 | int b_length; /* size of buffer in BBs */ |
133 | atomic_t b_hold; /* reference count */ | 112 | atomic_t b_hold; /* reference count */ |
134 | atomic_t b_lru_ref; /* lru reclaim ref count */ | 113 | atomic_t b_lru_ref; /* lru reclaim ref count */ |
135 | xfs_buf_flags_t b_flags; /* status flags */ | 114 | xfs_buf_flags_t b_flags; /* status flags */ |
@@ -140,8 +119,6 @@ typedef struct xfs_buf { | |||
140 | struct list_head b_list; | 119 | struct list_head b_list; |
141 | struct xfs_perag *b_pag; /* contains rbtree root */ | 120 | struct xfs_perag *b_pag; /* contains rbtree root */ |
142 | xfs_buftarg_t *b_target; /* buffer target (device) */ | 121 | xfs_buftarg_t *b_target; /* buffer target (device) */ |
143 | xfs_daddr_t b_bn; /* block number for I/O */ | ||
144 | size_t b_count_desired;/* desired transfer size */ | ||
145 | void *b_addr; /* virtual address of buffer */ | 122 | void *b_addr; /* virtual address of buffer */ |
146 | struct work_struct b_iodone_work; | 123 | struct work_struct b_iodone_work; |
147 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 124 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
@@ -150,7 +127,7 @@ typedef struct xfs_buf { | |||
150 | struct xfs_trans *b_transp; | 127 | struct xfs_trans *b_transp; |
151 | struct page **b_pages; /* array of page pointers */ | 128 | struct page **b_pages; /* array of page pointers */ |
152 | struct page *b_page_array[XB_PAGES]; /* inline pages */ | 129 | struct page *b_page_array[XB_PAGES]; /* inline pages */ |
153 | unsigned long b_queuetime; /* time buffer was queued */ | 130 | int b_io_length; /* IO size in BBs */ |
154 | atomic_t b_pin_count; /* pin count */ | 131 | atomic_t b_pin_count; /* pin count */ |
155 | atomic_t b_io_remaining; /* #outstanding I/O requests */ | 132 | atomic_t b_io_remaining; /* #outstanding I/O requests */ |
156 | unsigned int b_page_count; /* size of page array */ | 133 | unsigned int b_page_count; /* size of page array */ |
@@ -163,26 +140,30 @@ typedef struct xfs_buf { | |||
163 | 140 | ||
164 | 141 | ||
165 | /* Finding and Reading Buffers */ | 142 | /* Finding and Reading Buffers */ |
166 | extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, | 143 | struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno, |
167 | xfs_buf_flags_t, xfs_buf_t *); | 144 | size_t numblks, xfs_buf_flags_t flags, |
145 | struct xfs_buf *new_bp); | ||
168 | #define xfs_incore(buftarg,blkno,len,lockit) \ | 146 | #define xfs_incore(buftarg,blkno,len,lockit) \ |
169 | _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) | 147 | _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) |
170 | 148 | ||
171 | extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t, | 149 | struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno, |
172 | xfs_buf_flags_t); | 150 | size_t numblks, xfs_buf_flags_t flags); |
173 | extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, | 151 | struct xfs_buf *xfs_buf_read(struct xfs_buftarg *target, xfs_daddr_t blkno, |
174 | xfs_buf_flags_t); | 152 | size_t numblks, xfs_buf_flags_t flags); |
175 | 153 | void xfs_buf_readahead(struct xfs_buftarg *target, xfs_daddr_t blkno, | |
176 | struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *, xfs_off_t, size_t, | 154 | size_t numblks); |
177 | xfs_buf_flags_t); | 155 | |
178 | extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); | 156 | struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks); |
179 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); | 157 | struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *target, xfs_daddr_t blkno, |
180 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); | 158 | size_t numblks, xfs_buf_flags_t flags); |
181 | extern void xfs_buf_hold(xfs_buf_t *); | 159 | void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); |
182 | extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); | 160 | int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); |
183 | struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, | 161 | |
184 | struct xfs_buftarg *target, | 162 | struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, |
185 | xfs_daddr_t daddr, size_t length, int flags); | 163 | int flags); |
164 | struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target, | ||
165 | xfs_daddr_t daddr, size_t numblks, int flags); | ||
166 | void xfs_buf_hold(struct xfs_buf *bp); | ||
186 | 167 | ||
187 | /* Releasing Buffers */ | 168 | /* Releasing Buffers */ |
188 | extern void xfs_buf_free(xfs_buf_t *); | 169 | extern void xfs_buf_free(xfs_buf_t *); |
@@ -204,7 +185,7 @@ extern int xfs_bdstrat_cb(struct xfs_buf *); | |||
204 | extern void xfs_buf_ioend(xfs_buf_t *, int); | 185 | extern void xfs_buf_ioend(xfs_buf_t *, int); |
205 | extern void xfs_buf_ioerror(xfs_buf_t *, int); | 186 | extern void xfs_buf_ioerror(xfs_buf_t *, int); |
206 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); | 187 | extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); |
207 | extern int xfs_buf_iorequest(xfs_buf_t *); | 188 | extern void xfs_buf_iorequest(xfs_buf_t *); |
208 | extern int xfs_buf_iowait(xfs_buf_t *); | 189 | extern int xfs_buf_iowait(xfs_buf_t *); |
209 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, | 190 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, |
210 | xfs_buf_rw_t); | 191 | xfs_buf_rw_t); |
@@ -220,24 +201,22 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp) | |||
220 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); | 201 | extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); |
221 | 202 | ||
222 | /* Delayed Write Buffer Routines */ | 203 | /* Delayed Write Buffer Routines */ |
223 | extern void xfs_buf_delwri_queue(struct xfs_buf *); | 204 | extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); |
224 | extern void xfs_buf_delwri_dequeue(struct xfs_buf *); | 205 | extern int xfs_buf_delwri_submit(struct list_head *); |
225 | extern void xfs_buf_delwri_promote(struct xfs_buf *); | 206 | extern int xfs_buf_delwri_submit_nowait(struct list_head *); |
226 | 207 | ||
227 | /* Buffer Daemon Setup Routines */ | 208 | /* Buffer Daemon Setup Routines */ |
228 | extern int xfs_buf_init(void); | 209 | extern int xfs_buf_init(void); |
229 | extern void xfs_buf_terminate(void); | 210 | extern void xfs_buf_terminate(void); |
230 | 211 | ||
231 | #define XFS_BUF_ZEROFLAGS(bp) \ | 212 | #define XFS_BUF_ZEROFLAGS(bp) \ |
232 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ | 213 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ |
233 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) | 214 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) |
234 | 215 | ||
235 | void xfs_buf_stale(struct xfs_buf *bp); | 216 | void xfs_buf_stale(struct xfs_buf *bp); |
236 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) | 217 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) |
237 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) | 218 | #define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) |
238 | 219 | ||
239 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) | ||
240 | |||
241 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) | 220 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) |
242 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) | 221 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) |
243 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) | 222 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) |
@@ -256,12 +235,6 @@ void xfs_buf_stale(struct xfs_buf *bp); | |||
256 | 235 | ||
257 | #define XFS_BUF_ADDR(bp) ((bp)->b_bn) | 236 | #define XFS_BUF_ADDR(bp) ((bp)->b_bn) |
258 | #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) | 237 | #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) |
259 | #define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) | ||
260 | #define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off)) | ||
261 | #define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) | ||
262 | #define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) | ||
263 | #define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) | ||
264 | #define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) | ||
265 | 238 | ||
266 | static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) | 239 | static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) |
267 | { | 240 | { |
@@ -287,7 +260,6 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, | |||
287 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); | 260 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); |
288 | extern void xfs_wait_buftarg(xfs_buftarg_t *); | 261 | extern void xfs_wait_buftarg(xfs_buftarg_t *); |
289 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); | 262 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); |
290 | extern int xfs_flush_buftarg(xfs_buftarg_t *, int); | ||
291 | 263 | ||
292 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) | 264 | #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) |
293 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) | 265 | #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index eac97ef81e2a..45df2b857d48 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
@@ -123,11 +122,11 @@ xfs_buf_item_log_check( | |||
123 | ASSERT(bip->bli_logged != NULL); | 122 | ASSERT(bip->bli_logged != NULL); |
124 | 123 | ||
125 | bp = bip->bli_buf; | 124 | bp = bip->bli_buf; |
126 | ASSERT(XFS_BUF_COUNT(bp) > 0); | 125 | ASSERT(bp->b_length > 0); |
127 | ASSERT(bp->b_addr != NULL); | 126 | ASSERT(bp->b_addr != NULL); |
128 | orig = bip->bli_orig; | 127 | orig = bip->bli_orig; |
129 | buffer = bp->b_addr; | 128 | buffer = bp->b_addr; |
130 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { | 129 | for (x = 0; x < BBTOB(bp->b_length); x++) { |
131 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { | 130 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { |
132 | xfs_emerg(bp->b_mount, | 131 | xfs_emerg(bp->b_mount, |
133 | "%s: bip %x buffer %x orig %x index %d", | 132 | "%s: bip %x buffer %x orig %x index %d", |
@@ -418,7 +417,6 @@ xfs_buf_item_unpin( | |||
418 | if (freed && stale) { | 417 | if (freed && stale) { |
419 | ASSERT(bip->bli_flags & XFS_BLI_STALE); | 418 | ASSERT(bip->bli_flags & XFS_BLI_STALE); |
420 | ASSERT(xfs_buf_islocked(bp)); | 419 | ASSERT(xfs_buf_islocked(bp)); |
421 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | ||
422 | ASSERT(XFS_BUF_ISSTALE(bp)); | 420 | ASSERT(XFS_BUF_ISSTALE(bp)); |
423 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); | 421 | ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); |
424 | 422 | ||
@@ -455,42 +453,42 @@ xfs_buf_item_unpin( | |||
455 | bp->b_iodone = NULL; | 453 | bp->b_iodone = NULL; |
456 | } else { | 454 | } else { |
457 | spin_lock(&ailp->xa_lock); | 455 | spin_lock(&ailp->xa_lock); |
458 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); | 456 | xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR); |
459 | xfs_buf_item_relse(bp); | 457 | xfs_buf_item_relse(bp); |
460 | ASSERT(bp->b_fspriv == NULL); | 458 | ASSERT(bp->b_fspriv == NULL); |
461 | } | 459 | } |
462 | xfs_buf_relse(bp); | 460 | xfs_buf_relse(bp); |
461 | } else if (freed && remove) { | ||
462 | xfs_buf_lock(bp); | ||
463 | xfs_buf_ioerror(bp, EIO); | ||
464 | XFS_BUF_UNDONE(bp); | ||
465 | xfs_buf_stale(bp); | ||
466 | xfs_buf_ioend(bp, 0); | ||
463 | } | 467 | } |
464 | } | 468 | } |
465 | 469 | ||
466 | /* | ||
467 | * This is called to attempt to lock the buffer associated with this | ||
468 | * buf log item. Don't sleep on the buffer lock. If we can't get | ||
469 | * the lock right away, return 0. If we can get the lock, take a | ||
470 | * reference to the buffer. If this is a delayed write buffer that | ||
471 | * needs AIL help to be written back, invoke the pushbuf routine | ||
472 | * rather than the normal success path. | ||
473 | */ | ||
474 | STATIC uint | 470 | STATIC uint |
475 | xfs_buf_item_trylock( | 471 | xfs_buf_item_push( |
476 | struct xfs_log_item *lip) | 472 | struct xfs_log_item *lip, |
473 | struct list_head *buffer_list) | ||
477 | { | 474 | { |
478 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | 475 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
479 | struct xfs_buf *bp = bip->bli_buf; | 476 | struct xfs_buf *bp = bip->bli_buf; |
477 | uint rval = XFS_ITEM_SUCCESS; | ||
480 | 478 | ||
481 | if (xfs_buf_ispinned(bp)) | 479 | if (xfs_buf_ispinned(bp)) |
482 | return XFS_ITEM_PINNED; | 480 | return XFS_ITEM_PINNED; |
483 | if (!xfs_buf_trylock(bp)) | 481 | if (!xfs_buf_trylock(bp)) |
484 | return XFS_ITEM_LOCKED; | 482 | return XFS_ITEM_LOCKED; |
485 | 483 | ||
486 | /* take a reference to the buffer. */ | ||
487 | xfs_buf_hold(bp); | ||
488 | |||
489 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 484 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
490 | trace_xfs_buf_item_trylock(bip); | 485 | |
491 | if (XFS_BUF_ISDELAYWRITE(bp)) | 486 | trace_xfs_buf_item_push(bip); |
492 | return XFS_ITEM_PUSHBUF; | 487 | |
493 | return XFS_ITEM_SUCCESS; | 488 | if (!xfs_buf_delwri_queue(bp, buffer_list)) |
489 | rval = XFS_ITEM_FLUSHING; | ||
490 | xfs_buf_unlock(bp); | ||
491 | return rval; | ||
494 | } | 492 | } |
495 | 493 | ||
496 | /* | 494 | /* |
@@ -603,49 +601,6 @@ xfs_buf_item_committed( | |||
603 | return lsn; | 601 | return lsn; |
604 | } | 602 | } |
605 | 603 | ||
606 | /* | ||
607 | * The buffer is locked, but is not a delayed write buffer. This happens | ||
608 | * if we race with IO completion and hence we don't want to try to write it | ||
609 | * again. Just release the buffer. | ||
610 | */ | ||
611 | STATIC void | ||
612 | xfs_buf_item_push( | ||
613 | struct xfs_log_item *lip) | ||
614 | { | ||
615 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | ||
616 | struct xfs_buf *bp = bip->bli_buf; | ||
617 | |||
618 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | ||
619 | ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); | ||
620 | |||
621 | trace_xfs_buf_item_push(bip); | ||
622 | |||
623 | xfs_buf_relse(bp); | ||
624 | } | ||
625 | |||
626 | /* | ||
627 | * The buffer is locked and is a delayed write buffer. Promote the buffer | ||
628 | * in the delayed write queue as the caller knows that they must invoke | ||
629 | * the xfsbufd to get this buffer written. We have to unlock the buffer | ||
630 | * to allow the xfsbufd to write it, too. | ||
631 | */ | ||
632 | STATIC bool | ||
633 | xfs_buf_item_pushbuf( | ||
634 | struct xfs_log_item *lip) | ||
635 | { | ||
636 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | ||
637 | struct xfs_buf *bp = bip->bli_buf; | ||
638 | |||
639 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | ||
640 | ASSERT(XFS_BUF_ISDELAYWRITE(bp)); | ||
641 | |||
642 | trace_xfs_buf_item_pushbuf(bip); | ||
643 | |||
644 | xfs_buf_delwri_promote(bp); | ||
645 | xfs_buf_relse(bp); | ||
646 | return true; | ||
647 | } | ||
648 | |||
649 | STATIC void | 604 | STATIC void |
650 | xfs_buf_item_committing( | 605 | xfs_buf_item_committing( |
651 | struct xfs_log_item *lip, | 606 | struct xfs_log_item *lip, |
@@ -661,11 +616,9 @@ static const struct xfs_item_ops xfs_buf_item_ops = { | |||
661 | .iop_format = xfs_buf_item_format, | 616 | .iop_format = xfs_buf_item_format, |
662 | .iop_pin = xfs_buf_item_pin, | 617 | .iop_pin = xfs_buf_item_pin, |
663 | .iop_unpin = xfs_buf_item_unpin, | 618 | .iop_unpin = xfs_buf_item_unpin, |
664 | .iop_trylock = xfs_buf_item_trylock, | ||
665 | .iop_unlock = xfs_buf_item_unlock, | 619 | .iop_unlock = xfs_buf_item_unlock, |
666 | .iop_committed = xfs_buf_item_committed, | 620 | .iop_committed = xfs_buf_item_committed, |
667 | .iop_push = xfs_buf_item_push, | 621 | .iop_push = xfs_buf_item_push, |
668 | .iop_pushbuf = xfs_buf_item_pushbuf, | ||
669 | .iop_committing = xfs_buf_item_committing | 622 | .iop_committing = xfs_buf_item_committing |
670 | }; | 623 | }; |
671 | 624 | ||
@@ -703,7 +656,8 @@ xfs_buf_item_init( | |||
703 | * truncate any pieces. map_size is the size of the | 656 | * truncate any pieces. map_size is the size of the |
704 | * bitmap needed to describe the chunks of the buffer. | 657 | * bitmap needed to describe the chunks of the buffer. |
705 | */ | 658 | */ |
706 | chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLF_CHUNK - 1)) >> XFS_BLF_SHIFT); | 659 | chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >> |
660 | XFS_BLF_SHIFT); | ||
707 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); | 661 | map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT); |
708 | 662 | ||
709 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, | 663 | bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, |
@@ -713,7 +667,7 @@ xfs_buf_item_init( | |||
713 | xfs_buf_hold(bp); | 667 | xfs_buf_hold(bp); |
714 | bip->bli_format.blf_type = XFS_LI_BUF; | 668 | bip->bli_format.blf_type = XFS_LI_BUF; |
715 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); | 669 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); |
716 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); | 670 | bip->bli_format.blf_len = (ushort)bp->b_length; |
717 | bip->bli_format.blf_map_size = map_size; | 671 | bip->bli_format.blf_map_size = map_size; |
718 | 672 | ||
719 | #ifdef XFS_TRANS_DEBUG | 673 | #ifdef XFS_TRANS_DEBUG |
@@ -725,9 +679,9 @@ xfs_buf_item_init( | |||
725 | * the buffer to indicate which bytes the callers have asked | 679 | * the buffer to indicate which bytes the callers have asked |
726 | * to have logged. | 680 | * to have logged. |
727 | */ | 681 | */ |
728 | bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP); | 682 | bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP); |
729 | memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp)); | 683 | memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length)); |
730 | bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP); | 684 | bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP); |
731 | #endif | 685 | #endif |
732 | 686 | ||
733 | /* | 687 | /* |
@@ -984,20 +938,27 @@ xfs_buf_iodone_callbacks( | |||
984 | * If the write was asynchronous then no one will be looking for the | 938 | * If the write was asynchronous then no one will be looking for the |
985 | * error. Clear the error state and write the buffer out again. | 939 | * error. Clear the error state and write the buffer out again. |
986 | * | 940 | * |
987 | * During sync or umount we'll write all pending buffers again | 941 | * XXX: This helps against transient write errors, but we need to find |
988 | * synchronous, which will catch these errors if they keep hanging | 942 | * a way to shut the filesystem down if the writes keep failing. |
989 | * around. | 943 | * |
944 | * In practice we'll shut the filesystem down soon as non-transient | ||
945 | * erorrs tend to affect the whole device and a failing log write | ||
946 | * will make us give up. But we really ought to do better here. | ||
990 | */ | 947 | */ |
991 | if (XFS_BUF_ISASYNC(bp)) { | 948 | if (XFS_BUF_ISASYNC(bp)) { |
949 | ASSERT(bp->b_iodone != NULL); | ||
950 | |||
951 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
952 | |||
992 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ | 953 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ |
993 | 954 | ||
994 | if (!XFS_BUF_ISSTALE(bp)) { | 955 | if (!XFS_BUF_ISSTALE(bp)) { |
995 | xfs_buf_delwri_queue(bp); | 956 | bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; |
996 | XFS_BUF_DONE(bp); | 957 | xfs_bdstrat_cb(bp); |
958 | } else { | ||
959 | xfs_buf_relse(bp); | ||
997 | } | 960 | } |
998 | ASSERT(bp->b_iodone != NULL); | 961 | |
999 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
1000 | xfs_buf_relse(bp); | ||
1001 | return; | 962 | return; |
1002 | } | 963 | } |
1003 | 964 | ||
@@ -1045,6 +1006,6 @@ xfs_buf_iodone( | |||
1045 | * Either way, AIL is useless if we're forcing a shutdown. | 1006 | * Either way, AIL is useless if we're forcing a shutdown. |
1046 | */ | 1007 | */ |
1047 | spin_lock(&ailp->xa_lock); | 1008 | spin_lock(&ailp->xa_lock); |
1048 | xfs_trans_ail_delete(ailp, lip); | 1009 | xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); |
1049 | xfs_buf_item_free(BUF_ITEM(lip)); | 1010 | xfs_buf_item_free(BUF_ITEM(lip)); |
1050 | } | 1011 | } |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 7f1a6f5b05a6..015b946c5808 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
@@ -2277,20 +2276,20 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) | |||
2277 | if (nbuf == 1) { | 2276 | if (nbuf == 1) { |
2278 | dabuf->nbuf = 1; | 2277 | dabuf->nbuf = 1; |
2279 | bp = bps[0]; | 2278 | bp = bps[0]; |
2280 | dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); | 2279 | dabuf->bbcount = bp->b_length; |
2281 | dabuf->data = bp->b_addr; | 2280 | dabuf->data = bp->b_addr; |
2282 | dabuf->bps[0] = bp; | 2281 | dabuf->bps[0] = bp; |
2283 | } else { | 2282 | } else { |
2284 | dabuf->nbuf = nbuf; | 2283 | dabuf->nbuf = nbuf; |
2285 | for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) { | 2284 | for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) { |
2286 | dabuf->bps[i] = bp = bps[i]; | 2285 | dabuf->bps[i] = bp = bps[i]; |
2287 | dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp)); | 2286 | dabuf->bbcount += bp->b_length; |
2288 | } | 2287 | } |
2289 | dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); | 2288 | dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); |
2290 | for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { | 2289 | for (i = off = 0; i < nbuf; i++, off += BBTOB(bp->b_length)) { |
2291 | bp = bps[i]; | 2290 | bp = bps[i]; |
2292 | memcpy((char *)dabuf->data + off, bp->b_addr, | 2291 | memcpy((char *)dabuf->data + off, bp->b_addr, |
2293 | XFS_BUF_COUNT(bp)); | 2292 | BBTOB(bp->b_length)); |
2294 | } | 2293 | } |
2295 | } | 2294 | } |
2296 | return dabuf; | 2295 | return dabuf; |
@@ -2310,10 +2309,10 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf) | |||
2310 | ASSERT(dabuf->nbuf > 1); | 2309 | ASSERT(dabuf->nbuf > 1); |
2311 | dabuf->dirty = 0; | 2310 | dabuf->dirty = 0; |
2312 | for (i = off = 0; i < dabuf->nbuf; | 2311 | for (i = off = 0; i < dabuf->nbuf; |
2313 | i++, off += XFS_BUF_COUNT(bp)) { | 2312 | i++, off += BBTOB(bp->b_length)) { |
2314 | bp = dabuf->bps[i]; | 2313 | bp = dabuf->bps[i]; |
2315 | memcpy(bp->b_addr, dabuf->data + off, | 2314 | memcpy(bp->b_addr, dabuf->data + off, |
2316 | XFS_BUF_COUNT(bp)); | 2315 | BBTOB(bp->b_length)); |
2317 | } | 2316 | } |
2318 | } | 2317 | } |
2319 | } | 2318 | } |
@@ -2356,10 +2355,10 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) | |||
2356 | } | 2355 | } |
2357 | dabuf->dirty = 1; | 2356 | dabuf->dirty = 1; |
2358 | ASSERT(first <= last); | 2357 | ASSERT(first <= last); |
2359 | for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) { | 2358 | for (i = off = 0; i < dabuf->nbuf; i++, off += BBTOB(bp->b_length)) { |
2360 | bp = dabuf->bps[i]; | 2359 | bp = dabuf->bps[i]; |
2361 | f = off; | 2360 | f = off; |
2362 | l = f + XFS_BUF_COUNT(bp) - 1; | 2361 | l = f + BBTOB(bp->b_length) - 1; |
2363 | if (f < first) | 2362 | if (f < first) |
2364 | f = first; | 2363 | f = first; |
2365 | if (l > last) | 2364 | if (l > last) |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 1137bbc5eccb..e00de08dc8ac 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -18,9 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index a2e27010c7fb..67a250c36d41 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index d3b63aefd01d..586732f2d80d 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 5bbe2a8a023f..2046988e9eb2 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 66e108f561a3..397ffbcbab1d 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 0179a41d9e5a..b0f26780449d 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 79d05e84e296..19bf0c5e38f4 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 1ad3a4b8ca40..f9c3fe304a17 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
@@ -17,7 +17,6 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_sb.h" | 19 | #include "xfs_sb.h" |
20 | #include "xfs_inum.h" | ||
21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
22 | #include "xfs_ag.h" | 21 | #include "xfs_ag.h" |
23 | #include "xfs_mount.h" | 22 | #include "xfs_mount.h" |
@@ -30,6 +29,7 @@ | |||
30 | #include "xfs_inode.h" | 29 | #include "xfs_inode.h" |
31 | #include "xfs_alloc.h" | 30 | #include "xfs_alloc.h" |
32 | #include "xfs_error.h" | 31 | #include "xfs_error.h" |
32 | #include "xfs_extent_busy.h" | ||
33 | #include "xfs_discard.h" | 33 | #include "xfs_discard.h" |
34 | #include "xfs_trace.h" | 34 | #include "xfs_trace.h" |
35 | 35 | ||
@@ -118,7 +118,7 @@ xfs_trim_extents( | |||
118 | * If any blocks in the range are still busy, skip the | 118 | * If any blocks in the range are still busy, skip the |
119 | * discard and try again the next time. | 119 | * discard and try again the next time. |
120 | */ | 120 | */ |
121 | if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { | 121 | if (xfs_extent_busy_search(mp, agno, fbno, flen)) { |
122 | trace_xfs_discard_busy(mp, agno, fbno, flen); | 122 | trace_xfs_discard_busy(mp, agno, fbno, flen); |
123 | goto next_extent; | 123 | goto next_extent; |
124 | } | 124 | } |
@@ -212,7 +212,7 @@ xfs_discard_extents( | |||
212 | struct xfs_mount *mp, | 212 | struct xfs_mount *mp, |
213 | struct list_head *list) | 213 | struct list_head *list) |
214 | { | 214 | { |
215 | struct xfs_busy_extent *busyp; | 215 | struct xfs_extent_busy *busyp; |
216 | int error = 0; | 216 | int error = 0; |
217 | 217 | ||
218 | list_for_each_entry(busyp, list, list) { | 218 | list_for_each_entry(busyp, list, list) { |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 1155208fa830..bf27fcca4843 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | 20 | #include "xfs_bit.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
@@ -857,7 +856,7 @@ xfs_qm_dqflush_done( | |||
857 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 856 | /* xfs_trans_ail_delete() drops the AIL lock. */ |
858 | spin_lock(&ailp->xa_lock); | 857 | spin_lock(&ailp->xa_lock); |
859 | if (lip->li_lsn == qip->qli_flush_lsn) | 858 | if (lip->li_lsn == qip->qli_flush_lsn) |
860 | xfs_trans_ail_delete(ailp, lip); | 859 | xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); |
861 | else | 860 | else |
862 | spin_unlock(&ailp->xa_lock); | 861 | spin_unlock(&ailp->xa_lock); |
863 | } | 862 | } |
@@ -878,8 +877,8 @@ xfs_qm_dqflush_done( | |||
878 | */ | 877 | */ |
879 | int | 878 | int |
880 | xfs_qm_dqflush( | 879 | xfs_qm_dqflush( |
881 | xfs_dquot_t *dqp, | 880 | struct xfs_dquot *dqp, |
882 | uint flags) | 881 | struct xfs_buf **bpp) |
883 | { | 882 | { |
884 | struct xfs_mount *mp = dqp->q_mount; | 883 | struct xfs_mount *mp = dqp->q_mount; |
885 | struct xfs_buf *bp; | 884 | struct xfs_buf *bp; |
@@ -891,25 +890,30 @@ xfs_qm_dqflush( | |||
891 | 890 | ||
892 | trace_xfs_dqflush(dqp); | 891 | trace_xfs_dqflush(dqp); |
893 | 892 | ||
894 | /* | 893 | *bpp = NULL; |
895 | * If not dirty, or it's pinned and we are not supposed to block, nada. | 894 | |
896 | */ | ||
897 | if (!XFS_DQ_IS_DIRTY(dqp) || | ||
898 | ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) { | ||
899 | xfs_dqfunlock(dqp); | ||
900 | return 0; | ||
901 | } | ||
902 | xfs_qm_dqunpin_wait(dqp); | 895 | xfs_qm_dqunpin_wait(dqp); |
903 | 896 | ||
904 | /* | 897 | /* |
905 | * This may have been unpinned because the filesystem is shutting | 898 | * This may have been unpinned because the filesystem is shutting |
906 | * down forcibly. If that's the case we must not write this dquot | 899 | * down forcibly. If that's the case we must not write this dquot |
907 | * to disk, because the log record didn't make it to disk! | 900 | * to disk, because the log record didn't make it to disk. |
901 | * | ||
902 | * We also have to remove the log item from the AIL in this case, | ||
903 | * as we wait for an emptry AIL as part of the unmount process. | ||
908 | */ | 904 | */ |
909 | if (XFS_FORCED_SHUTDOWN(mp)) { | 905 | if (XFS_FORCED_SHUTDOWN(mp)) { |
906 | struct xfs_log_item *lip = &dqp->q_logitem.qli_item; | ||
910 | dqp->dq_flags &= ~XFS_DQ_DIRTY; | 907 | dqp->dq_flags &= ~XFS_DQ_DIRTY; |
911 | xfs_dqfunlock(dqp); | 908 | |
912 | return XFS_ERROR(EIO); | 909 | spin_lock(&mp->m_ail->xa_lock); |
910 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
911 | xfs_trans_ail_delete(mp->m_ail, lip, | ||
912 | SHUTDOWN_CORRUPT_INCORE); | ||
913 | else | ||
914 | spin_unlock(&mp->m_ail->xa_lock); | ||
915 | error = XFS_ERROR(EIO); | ||
916 | goto out_unlock; | ||
913 | } | 917 | } |
914 | 918 | ||
915 | /* | 919 | /* |
@@ -917,11 +921,8 @@ xfs_qm_dqflush( | |||
917 | */ | 921 | */ |
918 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, | 922 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, |
919 | mp->m_quotainfo->qi_dqchunklen, 0, &bp); | 923 | mp->m_quotainfo->qi_dqchunklen, 0, &bp); |
920 | if (error) { | 924 | if (error) |
921 | ASSERT(error != ENOENT); | 925 | goto out_unlock; |
922 | xfs_dqfunlock(dqp); | ||
923 | return error; | ||
924 | } | ||
925 | 926 | ||
926 | /* | 927 | /* |
927 | * Calculate the location of the dquot inside the buffer. | 928 | * Calculate the location of the dquot inside the buffer. |
@@ -967,20 +968,13 @@ xfs_qm_dqflush( | |||
967 | xfs_log_force(mp, 0); | 968 | xfs_log_force(mp, 0); |
968 | } | 969 | } |
969 | 970 | ||
970 | if (flags & SYNC_WAIT) | ||
971 | error = xfs_bwrite(bp); | ||
972 | else | ||
973 | xfs_buf_delwri_queue(bp); | ||
974 | |||
975 | xfs_buf_relse(bp); | ||
976 | |||
977 | trace_xfs_dqflush_done(dqp); | 971 | trace_xfs_dqflush_done(dqp); |
972 | *bpp = bp; | ||
973 | return 0; | ||
978 | 974 | ||
979 | /* | 975 | out_unlock: |
980 | * dqp is still locked, but caller is free to unlock it now. | 976 | xfs_dqfunlock(dqp); |
981 | */ | 977 | return XFS_ERROR(EIO); |
982 | return error; | ||
983 | |||
984 | } | 978 | } |
985 | 979 | ||
986 | /* | 980 | /* |
@@ -1011,39 +1005,6 @@ xfs_dqlock2( | |||
1011 | } | 1005 | } |
1012 | } | 1006 | } |
1013 | 1007 | ||
1014 | /* | ||
1015 | * Give the buffer a little push if it is incore and | ||
1016 | * wait on the flush lock. | ||
1017 | */ | ||
1018 | void | ||
1019 | xfs_dqflock_pushbuf_wait( | ||
1020 | xfs_dquot_t *dqp) | ||
1021 | { | ||
1022 | xfs_mount_t *mp = dqp->q_mount; | ||
1023 | xfs_buf_t *bp; | ||
1024 | |||
1025 | /* | ||
1026 | * Check to see if the dquot has been flushed delayed | ||
1027 | * write. If so, grab its buffer and send it | ||
1028 | * out immediately. We'll be able to acquire | ||
1029 | * the flush lock when the I/O completes. | ||
1030 | */ | ||
1031 | bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno, | ||
1032 | mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); | ||
1033 | if (!bp) | ||
1034 | goto out_lock; | ||
1035 | |||
1036 | if (XFS_BUF_ISDELAYWRITE(bp)) { | ||
1037 | if (xfs_buf_ispinned(bp)) | ||
1038 | xfs_log_force(mp, 0); | ||
1039 | xfs_buf_delwri_promote(bp); | ||
1040 | wake_up_process(bp->b_target->bt_task); | ||
1041 | } | ||
1042 | xfs_buf_relse(bp); | ||
1043 | out_lock: | ||
1044 | xfs_dqflock(dqp); | ||
1045 | } | ||
1046 | |||
1047 | int __init | 1008 | int __init |
1048 | xfs_qm_init(void) | 1009 | xfs_qm_init(void) |
1049 | { | 1010 | { |
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index ef9190bd8b30..7d20af27346d 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
@@ -141,7 +141,7 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) | |||
141 | extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, | 141 | extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, |
142 | uint, struct xfs_dquot **); | 142 | uint, struct xfs_dquot **); |
143 | extern void xfs_qm_dqdestroy(xfs_dquot_t *); | 143 | extern void xfs_qm_dqdestroy(xfs_dquot_t *); |
144 | extern int xfs_qm_dqflush(xfs_dquot_t *, uint); | 144 | extern int xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **); |
145 | extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); | 145 | extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); |
146 | extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, | 146 | extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, |
147 | xfs_disk_dquot_t *); | 147 | xfs_disk_dquot_t *); |
@@ -152,7 +152,6 @@ extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, | |||
152 | extern void xfs_qm_dqput(xfs_dquot_t *); | 152 | extern void xfs_qm_dqput(xfs_dquot_t *); |
153 | 153 | ||
154 | extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); | 154 | extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); |
155 | extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp); | ||
156 | 155 | ||
157 | static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) | 156 | static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) |
158 | { | 157 | { |
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 34baeae45265..57aa4b03720c 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c | |||
@@ -17,9 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | ||
21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
@@ -108,38 +106,6 @@ xfs_qm_dquot_logitem_unpin( | |||
108 | wake_up(&dqp->q_pinwait); | 106 | wake_up(&dqp->q_pinwait); |
109 | } | 107 | } |
110 | 108 | ||
111 | /* | ||
112 | * Given the logitem, this writes the corresponding dquot entry to disk | ||
113 | * asynchronously. This is called with the dquot entry securely locked; | ||
114 | * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot | ||
115 | * at the end. | ||
116 | */ | ||
117 | STATIC void | ||
118 | xfs_qm_dquot_logitem_push( | ||
119 | struct xfs_log_item *lip) | ||
120 | { | ||
121 | struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; | ||
122 | int error; | ||
123 | |||
124 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
125 | ASSERT(!completion_done(&dqp->q_flush)); | ||
126 | |||
127 | /* | ||
128 | * Since we were able to lock the dquot's flush lock and | ||
129 | * we found it on the AIL, the dquot must be dirty. This | ||
130 | * is because the dquot is removed from the AIL while still | ||
131 | * holding the flush lock in xfs_dqflush_done(). Thus, if | ||
132 | * we found it in the AIL and were able to obtain the flush | ||
133 | * lock without sleeping, then there must not have been | ||
134 | * anyone in the process of flushing the dquot. | ||
135 | */ | ||
136 | error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK); | ||
137 | if (error) | ||
138 | xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", | ||
139 | __func__, error, dqp); | ||
140 | xfs_dqunlock(dqp); | ||
141 | } | ||
142 | |||
143 | STATIC xfs_lsn_t | 109 | STATIC xfs_lsn_t |
144 | xfs_qm_dquot_logitem_committed( | 110 | xfs_qm_dquot_logitem_committed( |
145 | struct xfs_log_item *lip, | 111 | struct xfs_log_item *lip, |
@@ -171,67 +137,15 @@ xfs_qm_dqunpin_wait( | |||
171 | wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); | 137 | wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); |
172 | } | 138 | } |
173 | 139 | ||
174 | /* | ||
175 | * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that | ||
176 | * the dquot is locked by us, but the flush lock isn't. So, here we are | ||
177 | * going to see if the relevant dquot buffer is incore, waiting on DELWRI. | ||
178 | * If so, we want to push it out to help us take this item off the AIL as soon | ||
179 | * as possible. | ||
180 | * | ||
181 | * We must not be holding the AIL lock at this point. Calling incore() to | ||
182 | * search the buffer cache can be a time consuming thing, and AIL lock is a | ||
183 | * spinlock. | ||
184 | */ | ||
185 | STATIC bool | ||
186 | xfs_qm_dquot_logitem_pushbuf( | ||
187 | struct xfs_log_item *lip) | ||
188 | { | ||
189 | struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); | ||
190 | struct xfs_dquot *dqp = qlip->qli_dquot; | ||
191 | struct xfs_buf *bp; | ||
192 | bool ret = true; | ||
193 | |||
194 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | ||
195 | |||
196 | /* | ||
197 | * If flushlock isn't locked anymore, chances are that the | ||
198 | * inode flush completed and the inode was taken off the AIL. | ||
199 | * So, just get out. | ||
200 | */ | ||
201 | if (completion_done(&dqp->q_flush) || | ||
202 | !(lip->li_flags & XFS_LI_IN_AIL)) { | ||
203 | xfs_dqunlock(dqp); | ||
204 | return true; | ||
205 | } | ||
206 | |||
207 | bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, | ||
208 | dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); | ||
209 | xfs_dqunlock(dqp); | ||
210 | if (!bp) | ||
211 | return true; | ||
212 | if (XFS_BUF_ISDELAYWRITE(bp)) | ||
213 | xfs_buf_delwri_promote(bp); | ||
214 | if (xfs_buf_ispinned(bp)) | ||
215 | ret = false; | ||
216 | xfs_buf_relse(bp); | ||
217 | return ret; | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * This is called to attempt to lock the dquot associated with this | ||
222 | * dquot log item. Don't sleep on the dquot lock or the flush lock. | ||
223 | * If the flush lock is already held, indicating that the dquot has | ||
224 | * been or is in the process of being flushed, then see if we can | ||
225 | * find the dquot's buffer in the buffer cache without sleeping. If | ||
226 | * we can and it is marked delayed write, then we want to send it out. | ||
227 | * We delay doing so until the push routine, though, to avoid sleeping | ||
228 | * in any device strategy routines. | ||
229 | */ | ||
230 | STATIC uint | 140 | STATIC uint |
231 | xfs_qm_dquot_logitem_trylock( | 141 | xfs_qm_dquot_logitem_push( |
232 | struct xfs_log_item *lip) | 142 | struct xfs_log_item *lip, |
143 | struct list_head *buffer_list) | ||
233 | { | 144 | { |
234 | struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; | 145 | struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; |
146 | struct xfs_buf *bp = NULL; | ||
147 | uint rval = XFS_ITEM_SUCCESS; | ||
148 | int error; | ||
235 | 149 | ||
236 | if (atomic_read(&dqp->q_pincount) > 0) | 150 | if (atomic_read(&dqp->q_pincount) > 0) |
237 | return XFS_ITEM_PINNED; | 151 | return XFS_ITEM_PINNED; |
@@ -239,16 +153,41 @@ xfs_qm_dquot_logitem_trylock( | |||
239 | if (!xfs_dqlock_nowait(dqp)) | 153 | if (!xfs_dqlock_nowait(dqp)) |
240 | return XFS_ITEM_LOCKED; | 154 | return XFS_ITEM_LOCKED; |
241 | 155 | ||
156 | /* | ||
157 | * Re-check the pincount now that we stabilized the value by | ||
158 | * taking the quota lock. | ||
159 | */ | ||
160 | if (atomic_read(&dqp->q_pincount) > 0) { | ||
161 | rval = XFS_ITEM_PINNED; | ||
162 | goto out_unlock; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Someone else is already flushing the dquot. Nothing we can do | ||
167 | * here but wait for the flush to finish and remove the item from | ||
168 | * the AIL. | ||
169 | */ | ||
242 | if (!xfs_dqflock_nowait(dqp)) { | 170 | if (!xfs_dqflock_nowait(dqp)) { |
243 | /* | 171 | rval = XFS_ITEM_FLUSHING; |
244 | * dquot has already been flushed to the backing buffer, | 172 | goto out_unlock; |
245 | * leave it locked, pushbuf routine will unlock it. | ||
246 | */ | ||
247 | return XFS_ITEM_PUSHBUF; | ||
248 | } | 173 | } |
249 | 174 | ||
250 | ASSERT(lip->li_flags & XFS_LI_IN_AIL); | 175 | spin_unlock(&lip->li_ailp->xa_lock); |
251 | return XFS_ITEM_SUCCESS; | 176 | |
177 | error = xfs_qm_dqflush(dqp, &bp); | ||
178 | if (error) { | ||
179 | xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", | ||
180 | __func__, error, dqp); | ||
181 | } else { | ||
182 | if (!xfs_buf_delwri_queue(bp, buffer_list)) | ||
183 | rval = XFS_ITEM_FLUSHING; | ||
184 | xfs_buf_relse(bp); | ||
185 | } | ||
186 | |||
187 | spin_lock(&lip->li_ailp->xa_lock); | ||
188 | out_unlock: | ||
189 | xfs_dqunlock(dqp); | ||
190 | return rval; | ||
252 | } | 191 | } |
253 | 192 | ||
254 | /* | 193 | /* |
@@ -299,11 +238,9 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { | |||
299 | .iop_format = xfs_qm_dquot_logitem_format, | 238 | .iop_format = xfs_qm_dquot_logitem_format, |
300 | .iop_pin = xfs_qm_dquot_logitem_pin, | 239 | .iop_pin = xfs_qm_dquot_logitem_pin, |
301 | .iop_unpin = xfs_qm_dquot_logitem_unpin, | 240 | .iop_unpin = xfs_qm_dquot_logitem_unpin, |
302 | .iop_trylock = xfs_qm_dquot_logitem_trylock, | ||
303 | .iop_unlock = xfs_qm_dquot_logitem_unlock, | 241 | .iop_unlock = xfs_qm_dquot_logitem_unlock, |
304 | .iop_committed = xfs_qm_dquot_logitem_committed, | 242 | .iop_committed = xfs_qm_dquot_logitem_committed, |
305 | .iop_push = xfs_qm_dquot_logitem_push, | 243 | .iop_push = xfs_qm_dquot_logitem_push, |
306 | .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf, | ||
307 | .iop_committing = xfs_qm_dquot_logitem_committing | 244 | .iop_committing = xfs_qm_dquot_logitem_committing |
308 | }; | 245 | }; |
309 | 246 | ||
@@ -398,11 +335,13 @@ xfs_qm_qoff_logitem_unpin( | |||
398 | } | 335 | } |
399 | 336 | ||
400 | /* | 337 | /* |
401 | * Quotaoff items have no locking, so just return success. | 338 | * There isn't much you can do to push a quotaoff item. It is simply |
339 | * stuck waiting for the log to be flushed to disk. | ||
402 | */ | 340 | */ |
403 | STATIC uint | 341 | STATIC uint |
404 | xfs_qm_qoff_logitem_trylock( | 342 | xfs_qm_qoff_logitem_push( |
405 | struct xfs_log_item *lip) | 343 | struct xfs_log_item *lip, |
344 | struct list_head *buffer_list) | ||
406 | { | 345 | { |
407 | return XFS_ITEM_LOCKED; | 346 | return XFS_ITEM_LOCKED; |
408 | } | 347 | } |
@@ -429,17 +368,6 @@ xfs_qm_qoff_logitem_committed( | |||
429 | return lsn; | 368 | return lsn; |
430 | } | 369 | } |
431 | 370 | ||
432 | /* | ||
433 | * There isn't much you can do to push on an quotaoff item. It is simply | ||
434 | * stuck waiting for the log to be flushed to disk. | ||
435 | */ | ||
436 | STATIC void | ||
437 | xfs_qm_qoff_logitem_push( | ||
438 | struct xfs_log_item *lip) | ||
439 | { | ||
440 | } | ||
441 | |||
442 | |||
443 | STATIC xfs_lsn_t | 371 | STATIC xfs_lsn_t |
444 | xfs_qm_qoffend_logitem_committed( | 372 | xfs_qm_qoffend_logitem_committed( |
445 | struct xfs_log_item *lip, | 373 | struct xfs_log_item *lip, |
@@ -454,7 +382,7 @@ xfs_qm_qoffend_logitem_committed( | |||
454 | * xfs_trans_ail_delete() drops the AIL lock. | 382 | * xfs_trans_ail_delete() drops the AIL lock. |
455 | */ | 383 | */ |
456 | spin_lock(&ailp->xa_lock); | 384 | spin_lock(&ailp->xa_lock); |
457 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); | 385 | xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR); |
458 | 386 | ||
459 | kmem_free(qfs); | 387 | kmem_free(qfs); |
460 | kmem_free(qfe); | 388 | kmem_free(qfe); |
@@ -487,7 +415,6 @@ static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { | |||
487 | .iop_format = xfs_qm_qoff_logitem_format, | 415 | .iop_format = xfs_qm_qoff_logitem_format, |
488 | .iop_pin = xfs_qm_qoff_logitem_pin, | 416 | .iop_pin = xfs_qm_qoff_logitem_pin, |
489 | .iop_unpin = xfs_qm_qoff_logitem_unpin, | 417 | .iop_unpin = xfs_qm_qoff_logitem_unpin, |
490 | .iop_trylock = xfs_qm_qoff_logitem_trylock, | ||
491 | .iop_unlock = xfs_qm_qoff_logitem_unlock, | 418 | .iop_unlock = xfs_qm_qoff_logitem_unlock, |
492 | .iop_committed = xfs_qm_qoffend_logitem_committed, | 419 | .iop_committed = xfs_qm_qoffend_logitem_committed, |
493 | .iop_push = xfs_qm_qoff_logitem_push, | 420 | .iop_push = xfs_qm_qoff_logitem_push, |
@@ -502,7 +429,6 @@ static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { | |||
502 | .iop_format = xfs_qm_qoff_logitem_format, | 429 | .iop_format = xfs_qm_qoff_logitem_format, |
503 | .iop_pin = xfs_qm_qoff_logitem_pin, | 430 | .iop_pin = xfs_qm_qoff_logitem_pin, |
504 | .iop_unpin = xfs_qm_qoff_logitem_unpin, | 431 | .iop_unpin = xfs_qm_qoff_logitem_unpin, |
505 | .iop_trylock = xfs_qm_qoff_logitem_trylock, | ||
506 | .iop_unlock = xfs_qm_qoff_logitem_unlock, | 432 | .iop_unlock = xfs_qm_qoff_logitem_unlock, |
507 | .iop_committed = xfs_qm_qoff_logitem_committed, | 433 | .iop_committed = xfs_qm_qoff_logitem_committed, |
508 | .iop_push = xfs_qm_qoff_logitem_push, | 434 | .iop_push = xfs_qm_qoff_logitem_push, |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 39f06336b99d..610456054dc2 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 558910f5e3c0..2d25d19c4ea1 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c | |||
@@ -17,7 +17,6 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_types.h" | 19 | #include "xfs_types.h" |
20 | #include "xfs_inum.h" | ||
21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
22 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
23 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c new file mode 100644 index 000000000000..85e9f87a1a7c --- /dev/null +++ b/fs/xfs/xfs_extent_busy.c | |||
@@ -0,0 +1,603 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2010 David Chinner. | ||
4 | * Copyright (c) 2011 Christoph Hellwig. | ||
5 | * All Rights Reserved. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it would be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write the Free Software Foundation, | ||
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | #include "xfs.h" | ||
21 | #include "xfs_fs.h" | ||
22 | #include "xfs_types.h" | ||
23 | #include "xfs_log.h" | ||
24 | #include "xfs_trans.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_bmap_btree.h" | ||
29 | #include "xfs_alloc.h" | ||
30 | #include "xfs_inode.h" | ||
31 | #include "xfs_extent_busy.h" | ||
32 | #include "xfs_trace.h" | ||
33 | |||
34 | void | ||
35 | xfs_extent_busy_insert( | ||
36 | struct xfs_trans *tp, | ||
37 | xfs_agnumber_t agno, | ||
38 | xfs_agblock_t bno, | ||
39 | xfs_extlen_t len, | ||
40 | unsigned int flags) | ||
41 | { | ||
42 | struct xfs_extent_busy *new; | ||
43 | struct xfs_extent_busy *busyp; | ||
44 | struct xfs_perag *pag; | ||
45 | struct rb_node **rbp; | ||
46 | struct rb_node *parent = NULL; | ||
47 | |||
48 | new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL); | ||
49 | if (!new) { | ||
50 | /* | ||
51 | * No Memory! Since it is now not possible to track the free | ||
52 | * block, make this a synchronous transaction to insure that | ||
53 | * the block is not reused before this transaction commits. | ||
54 | */ | ||
55 | trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len); | ||
56 | xfs_trans_set_sync(tp); | ||
57 | return; | ||
58 | } | ||
59 | |||
60 | new->agno = agno; | ||
61 | new->bno = bno; | ||
62 | new->length = len; | ||
63 | INIT_LIST_HEAD(&new->list); | ||
64 | new->flags = flags; | ||
65 | |||
66 | /* trace before insert to be able to see failed inserts */ | ||
67 | trace_xfs_extent_busy(tp->t_mountp, agno, bno, len); | ||
68 | |||
69 | pag = xfs_perag_get(tp->t_mountp, new->agno); | ||
70 | spin_lock(&pag->pagb_lock); | ||
71 | rbp = &pag->pagb_tree.rb_node; | ||
72 | while (*rbp) { | ||
73 | parent = *rbp; | ||
74 | busyp = rb_entry(parent, struct xfs_extent_busy, rb_node); | ||
75 | |||
76 | if (new->bno < busyp->bno) { | ||
77 | rbp = &(*rbp)->rb_left; | ||
78 | ASSERT(new->bno + new->length <= busyp->bno); | ||
79 | } else if (new->bno > busyp->bno) { | ||
80 | rbp = &(*rbp)->rb_right; | ||
81 | ASSERT(bno >= busyp->bno + busyp->length); | ||
82 | } else { | ||
83 | ASSERT(0); | ||
84 | } | ||
85 | } | ||
86 | |||
87 | rb_link_node(&new->rb_node, parent, rbp); | ||
88 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | ||
89 | |||
90 | list_add(&new->list, &tp->t_busy); | ||
91 | spin_unlock(&pag->pagb_lock); | ||
92 | xfs_perag_put(pag); | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Search for a busy extent within the range of the extent we are about to | ||
97 | * allocate. You need to be holding the busy extent tree lock when calling | ||
98 | * xfs_extent_busy_search(). This function returns 0 for no overlapping busy | ||
99 | * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact | ||
100 | * match. This is done so that a non-zero return indicates an overlap that | ||
101 | * will require a synchronous transaction, but it can still be | ||
102 | * used to distinguish between a partial or exact match. | ||
103 | */ | ||
104 | int | ||
105 | xfs_extent_busy_search( | ||
106 | struct xfs_mount *mp, | ||
107 | xfs_agnumber_t agno, | ||
108 | xfs_agblock_t bno, | ||
109 | xfs_extlen_t len) | ||
110 | { | ||
111 | struct xfs_perag *pag; | ||
112 | struct rb_node *rbp; | ||
113 | struct xfs_extent_busy *busyp; | ||
114 | int match = 0; | ||
115 | |||
116 | pag = xfs_perag_get(mp, agno); | ||
117 | spin_lock(&pag->pagb_lock); | ||
118 | |||
119 | rbp = pag->pagb_tree.rb_node; | ||
120 | |||
121 | /* find closest start bno overlap */ | ||
122 | while (rbp) { | ||
123 | busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node); | ||
124 | if (bno < busyp->bno) { | ||
125 | /* may overlap, but exact start block is lower */ | ||
126 | if (bno + len > busyp->bno) | ||
127 | match = -1; | ||
128 | rbp = rbp->rb_left; | ||
129 | } else if (bno > busyp->bno) { | ||
130 | /* may overlap, but exact start block is higher */ | ||
131 | if (bno < busyp->bno + busyp->length) | ||
132 | match = -1; | ||
133 | rbp = rbp->rb_right; | ||
134 | } else { | ||
135 | /* bno matches busyp, length determines exact match */ | ||
136 | match = (busyp->length == len) ? 1 : -1; | ||
137 | break; | ||
138 | } | ||
139 | } | ||
140 | spin_unlock(&pag->pagb_lock); | ||
141 | xfs_perag_put(pag); | ||
142 | return match; | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * The found free extent [fbno, fend] overlaps part or all of the given busy | ||
147 | * extent. If the overlap covers the beginning, the end, or all of the busy | ||
148 | * extent, the overlapping portion can be made unbusy and used for the | ||
149 | * allocation. We can't split a busy extent because we can't modify a | ||
150 | * transaction/CIL context busy list, but we can update an entries block | ||
151 | * number or length. | ||
152 | * | ||
153 | * Returns true if the extent can safely be reused, or false if the search | ||
154 | * needs to be restarted. | ||
155 | */ | ||
156 | STATIC bool | ||
157 | xfs_extent_busy_update_extent( | ||
158 | struct xfs_mount *mp, | ||
159 | struct xfs_perag *pag, | ||
160 | struct xfs_extent_busy *busyp, | ||
161 | xfs_agblock_t fbno, | ||
162 | xfs_extlen_t flen, | ||
163 | bool userdata) | ||
164 | { | ||
165 | xfs_agblock_t fend = fbno + flen; | ||
166 | xfs_agblock_t bbno = busyp->bno; | ||
167 | xfs_agblock_t bend = bbno + busyp->length; | ||
168 | |||
169 | /* | ||
170 | * This extent is currently being discarded. Give the thread | ||
171 | * performing the discard a chance to mark the extent unbusy | ||
172 | * and retry. | ||
173 | */ | ||
174 | if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) { | ||
175 | spin_unlock(&pag->pagb_lock); | ||
176 | delay(1); | ||
177 | spin_lock(&pag->pagb_lock); | ||
178 | return false; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * If there is a busy extent overlapping a user allocation, we have | ||
183 | * no choice but to force the log and retry the search. | ||
184 | * | ||
185 | * Fortunately this does not happen during normal operation, but | ||
186 | * only if the filesystem is very low on space and has to dip into | ||
187 | * the AGFL for normal allocations. | ||
188 | */ | ||
189 | if (userdata) | ||
190 | goto out_force_log; | ||
191 | |||
192 | if (bbno < fbno && bend > fend) { | ||
193 | /* | ||
194 | * Case 1: | ||
195 | * bbno bend | ||
196 | * +BBBBBBBBBBBBBBBBB+ | ||
197 | * +---------+ | ||
198 | * fbno fend | ||
199 | */ | ||
200 | |||
201 | /* | ||
202 | * We would have to split the busy extent to be able to track | ||
203 | * it correct, which we cannot do because we would have to | ||
204 | * modify the list of busy extents attached to the transaction | ||
205 | * or CIL context, which is immutable. | ||
206 | * | ||
207 | * Force out the log to clear the busy extent and retry the | ||
208 | * search. | ||
209 | */ | ||
210 | goto out_force_log; | ||
211 | } else if (bbno >= fbno && bend <= fend) { | ||
212 | /* | ||
213 | * Case 2: | ||
214 | * bbno bend | ||
215 | * +BBBBBBBBBBBBBBBBB+ | ||
216 | * +-----------------+ | ||
217 | * fbno fend | ||
218 | * | ||
219 | * Case 3: | ||
220 | * bbno bend | ||
221 | * +BBBBBBBBBBBBBBBBB+ | ||
222 | * +--------------------------+ | ||
223 | * fbno fend | ||
224 | * | ||
225 | * Case 4: | ||
226 | * bbno bend | ||
227 | * +BBBBBBBBBBBBBBBBB+ | ||
228 | * +--------------------------+ | ||
229 | * fbno fend | ||
230 | * | ||
231 | * Case 5: | ||
232 | * bbno bend | ||
233 | * +BBBBBBBBBBBBBBBBB+ | ||
234 | * +-----------------------------------+ | ||
235 | * fbno fend | ||
236 | * | ||
237 | */ | ||
238 | |||
239 | /* | ||
240 | * The busy extent is fully covered by the extent we are | ||
241 | * allocating, and can simply be removed from the rbtree. | ||
242 | * However we cannot remove it from the immutable list | ||
243 | * tracking busy extents in the transaction or CIL context, | ||
244 | * so set the length to zero to mark it invalid. | ||
245 | * | ||
246 | * We also need to restart the busy extent search from the | ||
247 | * tree root, because erasing the node can rearrange the | ||
248 | * tree topology. | ||
249 | */ | ||
250 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
251 | busyp->length = 0; | ||
252 | return false; | ||
253 | } else if (fend < bend) { | ||
254 | /* | ||
255 | * Case 6: | ||
256 | * bbno bend | ||
257 | * +BBBBBBBBBBBBBBBBB+ | ||
258 | * +---------+ | ||
259 | * fbno fend | ||
260 | * | ||
261 | * Case 7: | ||
262 | * bbno bend | ||
263 | * +BBBBBBBBBBBBBBBBB+ | ||
264 | * +------------------+ | ||
265 | * fbno fend | ||
266 | * | ||
267 | */ | ||
268 | busyp->bno = fend; | ||
269 | } else if (bbno < fbno) { | ||
270 | /* | ||
271 | * Case 8: | ||
272 | * bbno bend | ||
273 | * +BBBBBBBBBBBBBBBBB+ | ||
274 | * +-------------+ | ||
275 | * fbno fend | ||
276 | * | ||
277 | * Case 9: | ||
278 | * bbno bend | ||
279 | * +BBBBBBBBBBBBBBBBB+ | ||
280 | * +----------------------+ | ||
281 | * fbno fend | ||
282 | */ | ||
283 | busyp->length = fbno - busyp->bno; | ||
284 | } else { | ||
285 | ASSERT(0); | ||
286 | } | ||
287 | |||
288 | trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen); | ||
289 | return true; | ||
290 | |||
291 | out_force_log: | ||
292 | spin_unlock(&pag->pagb_lock); | ||
293 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
294 | trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen); | ||
295 | spin_lock(&pag->pagb_lock); | ||
296 | return false; | ||
297 | } | ||
298 | |||
299 | |||
300 | /* | ||
301 | * For a given extent [fbno, flen], make sure we can reuse it safely. | ||
302 | */ | ||
303 | void | ||
304 | xfs_extent_busy_reuse( | ||
305 | struct xfs_mount *mp, | ||
306 | xfs_agnumber_t agno, | ||
307 | xfs_agblock_t fbno, | ||
308 | xfs_extlen_t flen, | ||
309 | bool userdata) | ||
310 | { | ||
311 | struct xfs_perag *pag; | ||
312 | struct rb_node *rbp; | ||
313 | |||
314 | ASSERT(flen > 0); | ||
315 | |||
316 | pag = xfs_perag_get(mp, agno); | ||
317 | spin_lock(&pag->pagb_lock); | ||
318 | restart: | ||
319 | rbp = pag->pagb_tree.rb_node; | ||
320 | while (rbp) { | ||
321 | struct xfs_extent_busy *busyp = | ||
322 | rb_entry(rbp, struct xfs_extent_busy, rb_node); | ||
323 | xfs_agblock_t bbno = busyp->bno; | ||
324 | xfs_agblock_t bend = bbno + busyp->length; | ||
325 | |||
326 | if (fbno + flen <= bbno) { | ||
327 | rbp = rbp->rb_left; | ||
328 | continue; | ||
329 | } else if (fbno >= bend) { | ||
330 | rbp = rbp->rb_right; | ||
331 | continue; | ||
332 | } | ||
333 | |||
334 | if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen, | ||
335 | userdata)) | ||
336 | goto restart; | ||
337 | } | ||
338 | spin_unlock(&pag->pagb_lock); | ||
339 | xfs_perag_put(pag); | ||
340 | } | ||
341 | |||
342 | /* | ||
343 | * For a given extent [fbno, flen], search the busy extent list to find a | ||
344 | * subset of the extent that is not busy. If *rlen is smaller than | ||
345 | * args->minlen no suitable extent could be found, and the higher level | ||
346 | * code needs to force out the log and retry the allocation. | ||
347 | */ | ||
348 | void | ||
349 | xfs_extent_busy_trim( | ||
350 | struct xfs_alloc_arg *args, | ||
351 | xfs_agblock_t bno, | ||
352 | xfs_extlen_t len, | ||
353 | xfs_agblock_t *rbno, | ||
354 | xfs_extlen_t *rlen) | ||
355 | { | ||
356 | xfs_agblock_t fbno; | ||
357 | xfs_extlen_t flen; | ||
358 | struct rb_node *rbp; | ||
359 | |||
360 | ASSERT(len > 0); | ||
361 | |||
362 | spin_lock(&args->pag->pagb_lock); | ||
363 | restart: | ||
364 | fbno = bno; | ||
365 | flen = len; | ||
366 | rbp = args->pag->pagb_tree.rb_node; | ||
367 | while (rbp && flen >= args->minlen) { | ||
368 | struct xfs_extent_busy *busyp = | ||
369 | rb_entry(rbp, struct xfs_extent_busy, rb_node); | ||
370 | xfs_agblock_t fend = fbno + flen; | ||
371 | xfs_agblock_t bbno = busyp->bno; | ||
372 | xfs_agblock_t bend = bbno + busyp->length; | ||
373 | |||
374 | if (fend <= bbno) { | ||
375 | rbp = rbp->rb_left; | ||
376 | continue; | ||
377 | } else if (fbno >= bend) { | ||
378 | rbp = rbp->rb_right; | ||
379 | continue; | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * If this is a metadata allocation, try to reuse the busy | ||
384 | * extent instead of trimming the allocation. | ||
385 | */ | ||
386 | if (!args->userdata && | ||
387 | !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) { | ||
388 | if (!xfs_extent_busy_update_extent(args->mp, args->pag, | ||
389 | busyp, fbno, flen, | ||
390 | false)) | ||
391 | goto restart; | ||
392 | continue; | ||
393 | } | ||
394 | |||
395 | if (bbno <= fbno) { | ||
396 | /* start overlap */ | ||
397 | |||
398 | /* | ||
399 | * Case 1: | ||
400 | * bbno bend | ||
401 | * +BBBBBBBBBBBBBBBBB+ | ||
402 | * +---------+ | ||
403 | * fbno fend | ||
404 | * | ||
405 | * Case 2: | ||
406 | * bbno bend | ||
407 | * +BBBBBBBBBBBBBBBBB+ | ||
408 | * +-------------+ | ||
409 | * fbno fend | ||
410 | * | ||
411 | * Case 3: | ||
412 | * bbno bend | ||
413 | * +BBBBBBBBBBBBBBBBB+ | ||
414 | * +-------------+ | ||
415 | * fbno fend | ||
416 | * | ||
417 | * Case 4: | ||
418 | * bbno bend | ||
419 | * +BBBBBBBBBBBBBBBBB+ | ||
420 | * +-----------------+ | ||
421 | * fbno fend | ||
422 | * | ||
423 | * No unbusy region in extent, return failure. | ||
424 | */ | ||
425 | if (fend <= bend) | ||
426 | goto fail; | ||
427 | |||
428 | /* | ||
429 | * Case 5: | ||
430 | * bbno bend | ||
431 | * +BBBBBBBBBBBBBBBBB+ | ||
432 | * +----------------------+ | ||
433 | * fbno fend | ||
434 | * | ||
435 | * Case 6: | ||
436 | * bbno bend | ||
437 | * +BBBBBBBBBBBBBBBBB+ | ||
438 | * +--------------------------+ | ||
439 | * fbno fend | ||
440 | * | ||
441 | * Needs to be trimmed to: | ||
442 | * +-------+ | ||
443 | * fbno fend | ||
444 | */ | ||
445 | fbno = bend; | ||
446 | } else if (bend >= fend) { | ||
447 | /* end overlap */ | ||
448 | |||
449 | /* | ||
450 | * Case 7: | ||
451 | * bbno bend | ||
452 | * +BBBBBBBBBBBBBBBBB+ | ||
453 | * +------------------+ | ||
454 | * fbno fend | ||
455 | * | ||
456 | * Case 8: | ||
457 | * bbno bend | ||
458 | * +BBBBBBBBBBBBBBBBB+ | ||
459 | * +--------------------------+ | ||
460 | * fbno fend | ||
461 | * | ||
462 | * Needs to be trimmed to: | ||
463 | * +-------+ | ||
464 | * fbno fend | ||
465 | */ | ||
466 | fend = bbno; | ||
467 | } else { | ||
468 | /* middle overlap */ | ||
469 | |||
470 | /* | ||
471 | * Case 9: | ||
472 | * bbno bend | ||
473 | * +BBBBBBBBBBBBBBBBB+ | ||
474 | * +-----------------------------------+ | ||
475 | * fbno fend | ||
476 | * | ||
477 | * Can be trimmed to: | ||
478 | * +-------+ OR +-------+ | ||
479 | * fbno fend fbno fend | ||
480 | * | ||
481 | * Backward allocation leads to significant | ||
482 | * fragmentation of directories, which degrades | ||
483 | * directory performance, therefore we always want to | ||
484 | * choose the option that produces forward allocation | ||
485 | * patterns. | ||
486 | * Preferring the lower bno extent will make the next | ||
487 | * request use "fend" as the start of the next | ||
488 | * allocation; if the segment is no longer busy at | ||
489 | * that point, we'll get a contiguous allocation, but | ||
490 | * even if it is still busy, we will get a forward | ||
491 | * allocation. | ||
492 | * We try to avoid choosing the segment at "bend", | ||
493 | * because that can lead to the next allocation | ||
494 | * taking the segment at "fbno", which would be a | ||
495 | * backward allocation. We only use the segment at | ||
496 | * "fbno" if it is much larger than the current | ||
497 | * requested size, because in that case there's a | ||
498 | * good chance subsequent allocations will be | ||
499 | * contiguous. | ||
500 | */ | ||
501 | if (bbno - fbno >= args->maxlen) { | ||
502 | /* left candidate fits perfect */ | ||
503 | fend = bbno; | ||
504 | } else if (fend - bend >= args->maxlen * 4) { | ||
505 | /* right candidate has enough free space */ | ||
506 | fbno = bend; | ||
507 | } else if (bbno - fbno >= args->minlen) { | ||
508 | /* left candidate fits minimum requirement */ | ||
509 | fend = bbno; | ||
510 | } else { | ||
511 | goto fail; | ||
512 | } | ||
513 | } | ||
514 | |||
515 | flen = fend - fbno; | ||
516 | } | ||
517 | spin_unlock(&args->pag->pagb_lock); | ||
518 | |||
519 | if (fbno != bno || flen != len) { | ||
520 | trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, | ||
521 | fbno, flen); | ||
522 | } | ||
523 | *rbno = fbno; | ||
524 | *rlen = flen; | ||
525 | return; | ||
526 | fail: | ||
527 | /* | ||
528 | * Return a zero extent length as failure indications. All callers | ||
529 | * re-check if the trimmed extent satisfies the minlen requirement. | ||
530 | */ | ||
531 | spin_unlock(&args->pag->pagb_lock); | ||
532 | trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0); | ||
533 | *rbno = fbno; | ||
534 | *rlen = 0; | ||
535 | } | ||
536 | |||
537 | STATIC void | ||
538 | xfs_extent_busy_clear_one( | ||
539 | struct xfs_mount *mp, | ||
540 | struct xfs_perag *pag, | ||
541 | struct xfs_extent_busy *busyp) | ||
542 | { | ||
543 | if (busyp->length) { | ||
544 | trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno, | ||
545 | busyp->length); | ||
546 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
547 | } | ||
548 | |||
549 | list_del_init(&busyp->list); | ||
550 | kmem_free(busyp); | ||
551 | } | ||
552 | |||
553 | /* | ||
554 | * Remove all extents on the passed in list from the busy extents tree. | ||
555 | * If do_discard is set skip extents that need to be discarded, and mark | ||
556 | * these as undergoing a discard operation instead. | ||
557 | */ | ||
558 | void | ||
559 | xfs_extent_busy_clear( | ||
560 | struct xfs_mount *mp, | ||
561 | struct list_head *list, | ||
562 | bool do_discard) | ||
563 | { | ||
564 | struct xfs_extent_busy *busyp, *n; | ||
565 | struct xfs_perag *pag = NULL; | ||
566 | xfs_agnumber_t agno = NULLAGNUMBER; | ||
567 | |||
568 | list_for_each_entry_safe(busyp, n, list, list) { | ||
569 | if (busyp->agno != agno) { | ||
570 | if (pag) { | ||
571 | spin_unlock(&pag->pagb_lock); | ||
572 | xfs_perag_put(pag); | ||
573 | } | ||
574 | pag = xfs_perag_get(mp, busyp->agno); | ||
575 | spin_lock(&pag->pagb_lock); | ||
576 | agno = busyp->agno; | ||
577 | } | ||
578 | |||
579 | if (do_discard && busyp->length && | ||
580 | !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) | ||
581 | busyp->flags = XFS_EXTENT_BUSY_DISCARDED; | ||
582 | else | ||
583 | xfs_extent_busy_clear_one(mp, pag, busyp); | ||
584 | } | ||
585 | |||
586 | if (pag) { | ||
587 | spin_unlock(&pag->pagb_lock); | ||
588 | xfs_perag_put(pag); | ||
589 | } | ||
590 | } | ||
591 | |||
592 | /* | ||
593 | * Callback for list_sort to sort busy extents by the AG they reside in. | ||
594 | */ | ||
595 | int | ||
596 | xfs_extent_busy_ag_cmp( | ||
597 | void *priv, | ||
598 | struct list_head *a, | ||
599 | struct list_head *b) | ||
600 | { | ||
601 | return container_of(a, struct xfs_extent_busy, list)->agno - | ||
602 | container_of(b, struct xfs_extent_busy, list)->agno; | ||
603 | } | ||
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h new file mode 100644 index 000000000000..985412d65ba5 --- /dev/null +++ b/fs/xfs/xfs_extent_busy.h | |||
@@ -0,0 +1,69 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2010 David Chinner. | ||
4 | * Copyright (c) 2011 Christoph Hellwig. | ||
5 | * All Rights Reserved. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it would be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write the Free Software Foundation, | ||
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | #ifndef __XFS_EXTENT_BUSY_H__ | ||
21 | #define __XFS_EXTENT_BUSY_H__ | ||
22 | |||
23 | /* | ||
24 | * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that | ||
25 | * have been freed but whose transactions aren't committed to disk yet. | ||
26 | * | ||
27 | * Note that we use the transaction ID to record the transaction, not the | ||
28 | * transaction structure itself. See xfs_extent_busy_insert() for details. | ||
29 | */ | ||
30 | struct xfs_extent_busy { | ||
31 | struct rb_node rb_node; /* ag by-bno indexed search tree */ | ||
32 | struct list_head list; /* transaction busy extent list */ | ||
33 | xfs_agnumber_t agno; | ||
34 | xfs_agblock_t bno; | ||
35 | xfs_extlen_t length; | ||
36 | unsigned int flags; | ||
37 | #define XFS_EXTENT_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ | ||
38 | #define XFS_EXTENT_BUSY_SKIP_DISCARD 0x02 /* do not discard */ | ||
39 | }; | ||
40 | |||
41 | void | ||
42 | xfs_extent_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, | ||
43 | xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); | ||
44 | |||
45 | void | ||
46 | xfs_extent_busy_clear(struct xfs_mount *mp, struct list_head *list, | ||
47 | bool do_discard); | ||
48 | |||
49 | int | ||
50 | xfs_extent_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
51 | xfs_agblock_t bno, xfs_extlen_t len); | ||
52 | |||
53 | void | ||
54 | xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
55 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); | ||
56 | |||
57 | void | ||
58 | xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t bno, | ||
59 | xfs_extlen_t len, xfs_agblock_t *rbno, xfs_extlen_t *rlen); | ||
60 | |||
61 | int | ||
62 | xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); | ||
63 | |||
64 | static inline void xfs_extent_busy_sort(struct list_head *list) | ||
65 | { | ||
66 | list_sort(NULL, list, xfs_extent_busy_ag_cmp); | ||
67 | } | ||
68 | |||
69 | #endif /* __XFS_EXTENT_BUSY_H__ */ | ||
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 35c2aff38b20..feb36d7551ae 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_buf_item.h" | 23 | #include "xfs_buf_item.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
@@ -64,7 +63,8 @@ __xfs_efi_release( | |||
64 | if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { | 63 | if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { |
65 | spin_lock(&ailp->xa_lock); | 64 | spin_lock(&ailp->xa_lock); |
66 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 65 | /* xfs_trans_ail_delete() drops the AIL lock. */ |
67 | xfs_trans_ail_delete(ailp, &efip->efi_item); | 66 | xfs_trans_ail_delete(ailp, &efip->efi_item, |
67 | SHUTDOWN_LOG_IO_ERROR); | ||
68 | xfs_efi_item_free(efip); | 68 | xfs_efi_item_free(efip); |
69 | } | 69 | } |
70 | } | 70 | } |
@@ -147,22 +147,20 @@ xfs_efi_item_unpin( | |||
147 | } | 147 | } |
148 | 148 | ||
149 | /* | 149 | /* |
150 | * Efi items have no locking or pushing. However, since EFIs are | 150 | * Efi items have no locking or pushing. However, since EFIs are pulled from |
151 | * pulled from the AIL when their corresponding EFDs are committed | 151 | * the AIL when their corresponding EFDs are committed to disk, their situation |
152 | * to disk, their situation is very similar to being pinned. Return | 152 | * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller |
153 | * XFS_ITEM_PINNED so that the caller will eventually flush the log. | 153 | * will eventually flush the log. This should help in getting the EFI out of |
154 | * This should help in getting the EFI out of the AIL. | 154 | * the AIL. |
155 | */ | 155 | */ |
156 | STATIC uint | 156 | STATIC uint |
157 | xfs_efi_item_trylock( | 157 | xfs_efi_item_push( |
158 | struct xfs_log_item *lip) | 158 | struct xfs_log_item *lip, |
159 | struct list_head *buffer_list) | ||
159 | { | 160 | { |
160 | return XFS_ITEM_PINNED; | 161 | return XFS_ITEM_PINNED; |
161 | } | 162 | } |
162 | 163 | ||
163 | /* | ||
164 | * Efi items have no locking, so just return. | ||
165 | */ | ||
166 | STATIC void | 164 | STATIC void |
167 | xfs_efi_item_unlock( | 165 | xfs_efi_item_unlock( |
168 | struct xfs_log_item *lip) | 166 | struct xfs_log_item *lip) |
@@ -190,17 +188,6 @@ xfs_efi_item_committed( | |||
190 | } | 188 | } |
191 | 189 | ||
192 | /* | 190 | /* |
193 | * There isn't much you can do to push on an efi item. It is simply | ||
194 | * stuck waiting for all of its corresponding efd items to be | ||
195 | * committed to disk. | ||
196 | */ | ||
197 | STATIC void | ||
198 | xfs_efi_item_push( | ||
199 | struct xfs_log_item *lip) | ||
200 | { | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * The EFI dependency tracking op doesn't do squat. It can't because | 191 | * The EFI dependency tracking op doesn't do squat. It can't because |
205 | * it doesn't know where the free extent is coming from. The dependency | 192 | * it doesn't know where the free extent is coming from. The dependency |
206 | * tracking has to be handled by the "enclosing" metadata object. For | 193 | * tracking has to be handled by the "enclosing" metadata object. For |
@@ -222,7 +209,6 @@ static const struct xfs_item_ops xfs_efi_item_ops = { | |||
222 | .iop_format = xfs_efi_item_format, | 209 | .iop_format = xfs_efi_item_format, |
223 | .iop_pin = xfs_efi_item_pin, | 210 | .iop_pin = xfs_efi_item_pin, |
224 | .iop_unpin = xfs_efi_item_unpin, | 211 | .iop_unpin = xfs_efi_item_unpin, |
225 | .iop_trylock = xfs_efi_item_trylock, | ||
226 | .iop_unlock = xfs_efi_item_unlock, | 212 | .iop_unlock = xfs_efi_item_unlock, |
227 | .iop_committed = xfs_efi_item_committed, | 213 | .iop_committed = xfs_efi_item_committed, |
228 | .iop_push = xfs_efi_item_push, | 214 | .iop_push = xfs_efi_item_push, |
@@ -404,19 +390,17 @@ xfs_efd_item_unpin( | |||
404 | } | 390 | } |
405 | 391 | ||
406 | /* | 392 | /* |
407 | * Efd items have no locking, so just return success. | 393 | * There isn't much you can do to push on an efd item. It is simply stuck |
394 | * waiting for the log to be flushed to disk. | ||
408 | */ | 395 | */ |
409 | STATIC uint | 396 | STATIC uint |
410 | xfs_efd_item_trylock( | 397 | xfs_efd_item_push( |
411 | struct xfs_log_item *lip) | 398 | struct xfs_log_item *lip, |
399 | struct list_head *buffer_list) | ||
412 | { | 400 | { |
413 | return XFS_ITEM_LOCKED; | 401 | return XFS_ITEM_PINNED; |
414 | } | 402 | } |
415 | 403 | ||
416 | /* | ||
417 | * Efd items have no locking or pushing, so return failure | ||
418 | * so that the caller doesn't bother with us. | ||
419 | */ | ||
420 | STATIC void | 404 | STATIC void |
421 | xfs_efd_item_unlock( | 405 | xfs_efd_item_unlock( |
422 | struct xfs_log_item *lip) | 406 | struct xfs_log_item *lip) |
@@ -451,16 +435,6 @@ xfs_efd_item_committed( | |||
451 | } | 435 | } |
452 | 436 | ||
453 | /* | 437 | /* |
454 | * There isn't much you can do to push on an efd item. It is simply | ||
455 | * stuck waiting for the log to be flushed to disk. | ||
456 | */ | ||
457 | STATIC void | ||
458 | xfs_efd_item_push( | ||
459 | struct xfs_log_item *lip) | ||
460 | { | ||
461 | } | ||
462 | |||
463 | /* | ||
464 | * The EFD dependency tracking op doesn't do squat. It can't because | 438 | * The EFD dependency tracking op doesn't do squat. It can't because |
465 | * it doesn't know where the free extent is coming from. The dependency | 439 | * it doesn't know where the free extent is coming from. The dependency |
466 | * tracking has to be handled by the "enclosing" metadata object. For | 440 | * tracking has to be handled by the "enclosing" metadata object. For |
@@ -482,7 +456,6 @@ static const struct xfs_item_ops xfs_efd_item_ops = { | |||
482 | .iop_format = xfs_efd_item_format, | 456 | .iop_format = xfs_efd_item_format, |
483 | .iop_pin = xfs_efd_item_pin, | 457 | .iop_pin = xfs_efd_item_pin, |
484 | .iop_unpin = xfs_efd_item_unpin, | 458 | .iop_unpin = xfs_efd_item_unpin, |
485 | .iop_trylock = xfs_efd_item_trylock, | ||
486 | .iop_unlock = xfs_efd_item_unlock, | 459 | .iop_unlock = xfs_efd_item_unlock, |
487 | .iop_committed = xfs_efd_item_committed, | 460 | .iop_committed = xfs_efd_item_committed, |
488 | .iop_push = xfs_efd_item_push, | 461 | .iop_push = xfs_efd_item_push, |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 54a67dd9ac0a..8d214b87f6bb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -17,9 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | ||
21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_sb.h" | 21 | #include "xfs_sb.h" |
24 | #include "xfs_ag.h" | 22 | #include "xfs_ag.h" |
25 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
@@ -396,114 +394,96 @@ xfs_file_splice_write( | |||
396 | } | 394 | } |
397 | 395 | ||
398 | /* | 396 | /* |
399 | * This routine is called to handle zeroing any space in the last | 397 | * This routine is called to handle zeroing any space in the last block of the |
400 | * block of the file that is beyond the EOF. We do this since the | 398 | * file that is beyond the EOF. We do this since the size is being increased |
401 | * size is being increased without writing anything to that block | 399 | * without writing anything to that block and we don't want to read the |
402 | * and we don't want anyone to read the garbage on the disk. | 400 | * garbage on the disk. |
403 | */ | 401 | */ |
404 | STATIC int /* error (positive) */ | 402 | STATIC int /* error (positive) */ |
405 | xfs_zero_last_block( | 403 | xfs_zero_last_block( |
406 | xfs_inode_t *ip, | 404 | struct xfs_inode *ip, |
407 | xfs_fsize_t offset, | 405 | xfs_fsize_t offset, |
408 | xfs_fsize_t isize) | 406 | xfs_fsize_t isize) |
409 | { | 407 | { |
410 | xfs_fileoff_t last_fsb; | 408 | struct xfs_mount *mp = ip->i_mount; |
411 | xfs_mount_t *mp = ip->i_mount; | 409 | xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize); |
412 | int nimaps; | 410 | int zero_offset = XFS_B_FSB_OFFSET(mp, isize); |
413 | int zero_offset; | 411 | int zero_len; |
414 | int zero_len; | 412 | int nimaps = 1; |
415 | int error = 0; | 413 | int error = 0; |
416 | xfs_bmbt_irec_t imap; | 414 | struct xfs_bmbt_irec imap; |
417 | |||
418 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
419 | |||
420 | zero_offset = XFS_B_FSB_OFFSET(mp, isize); | ||
421 | if (zero_offset == 0) { | ||
422 | /* | ||
423 | * There are no extra bytes in the last block on disk to | ||
424 | * zero, so return. | ||
425 | */ | ||
426 | return 0; | ||
427 | } | ||
428 | 415 | ||
429 | last_fsb = XFS_B_TO_FSBT(mp, isize); | 416 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
430 | nimaps = 1; | ||
431 | error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0); | 417 | error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0); |
418 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
432 | if (error) | 419 | if (error) |
433 | return error; | 420 | return error; |
421 | |||
434 | ASSERT(nimaps > 0); | 422 | ASSERT(nimaps > 0); |
423 | |||
435 | /* | 424 | /* |
436 | * If the block underlying isize is just a hole, then there | 425 | * If the block underlying isize is just a hole, then there |
437 | * is nothing to zero. | 426 | * is nothing to zero. |
438 | */ | 427 | */ |
439 | if (imap.br_startblock == HOLESTARTBLOCK) { | 428 | if (imap.br_startblock == HOLESTARTBLOCK) |
440 | return 0; | 429 | return 0; |
441 | } | ||
442 | /* | ||
443 | * Zero the part of the last block beyond the EOF, and write it | ||
444 | * out sync. We need to drop the ilock while we do this so we | ||
445 | * don't deadlock when the buffer cache calls back to us. | ||
446 | */ | ||
447 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
448 | 430 | ||
449 | zero_len = mp->m_sb.sb_blocksize - zero_offset; | 431 | zero_len = mp->m_sb.sb_blocksize - zero_offset; |
450 | if (isize + zero_len > offset) | 432 | if (isize + zero_len > offset) |
451 | zero_len = offset - isize; | 433 | zero_len = offset - isize; |
452 | error = xfs_iozero(ip, isize, zero_len); | 434 | return xfs_iozero(ip, isize, zero_len); |
453 | |||
454 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
455 | ASSERT(error >= 0); | ||
456 | return error; | ||
457 | } | 435 | } |
458 | 436 | ||
459 | /* | 437 | /* |
460 | * Zero any on disk space between the current EOF and the new, | 438 | * Zero any on disk space between the current EOF and the new, larger EOF. |
461 | * larger EOF. This handles the normal case of zeroing the remainder | 439 | * |
462 | * of the last block in the file and the unusual case of zeroing blocks | 440 | * This handles the normal case of zeroing the remainder of the last block in |
463 | * out beyond the size of the file. This second case only happens | 441 | * the file and the unusual case of zeroing blocks out beyond the size of the |
464 | * with fixed size extents and when the system crashes before the inode | 442 | * file. This second case only happens with fixed size extents and when the |
465 | * size was updated but after blocks were allocated. If fill is set, | 443 | * system crashes before the inode size was updated but after blocks were |
466 | * then any holes in the range are filled and zeroed. If not, the holes | 444 | * allocated. |
467 | * are left alone as holes. | 445 | * |
446 | * Expects the iolock to be held exclusive, and will take the ilock internally. | ||
468 | */ | 447 | */ |
469 | |||
470 | int /* error (positive) */ | 448 | int /* error (positive) */ |
471 | xfs_zero_eof( | 449 | xfs_zero_eof( |
472 | xfs_inode_t *ip, | 450 | struct xfs_inode *ip, |
473 | xfs_off_t offset, /* starting I/O offset */ | 451 | xfs_off_t offset, /* starting I/O offset */ |
474 | xfs_fsize_t isize) /* current inode size */ | 452 | xfs_fsize_t isize) /* current inode size */ |
475 | { | 453 | { |
476 | xfs_mount_t *mp = ip->i_mount; | 454 | struct xfs_mount *mp = ip->i_mount; |
477 | xfs_fileoff_t start_zero_fsb; | 455 | xfs_fileoff_t start_zero_fsb; |
478 | xfs_fileoff_t end_zero_fsb; | 456 | xfs_fileoff_t end_zero_fsb; |
479 | xfs_fileoff_t zero_count_fsb; | 457 | xfs_fileoff_t zero_count_fsb; |
480 | xfs_fileoff_t last_fsb; | 458 | xfs_fileoff_t last_fsb; |
481 | xfs_fileoff_t zero_off; | 459 | xfs_fileoff_t zero_off; |
482 | xfs_fsize_t zero_len; | 460 | xfs_fsize_t zero_len; |
483 | int nimaps; | 461 | int nimaps; |
484 | int error = 0; | 462 | int error = 0; |
485 | xfs_bmbt_irec_t imap; | 463 | struct xfs_bmbt_irec imap; |
486 | 464 | ||
487 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 465 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
488 | ASSERT(offset > isize); | 466 | ASSERT(offset > isize); |
489 | 467 | ||
490 | /* | 468 | /* |
491 | * First handle zeroing the block on which isize resides. | 469 | * First handle zeroing the block on which isize resides. |
470 | * | ||
492 | * We only zero a part of that block so it is handled specially. | 471 | * We only zero a part of that block so it is handled specially. |
493 | */ | 472 | */ |
494 | error = xfs_zero_last_block(ip, offset, isize); | 473 | if (XFS_B_FSB_OFFSET(mp, isize) != 0) { |
495 | if (error) { | 474 | error = xfs_zero_last_block(ip, offset, isize); |
496 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 475 | if (error) |
497 | return error; | 476 | return error; |
498 | } | 477 | } |
499 | 478 | ||
500 | /* | 479 | /* |
501 | * Calculate the range between the new size and the old | 480 | * Calculate the range between the new size and the old where blocks |
502 | * where blocks needing to be zeroed may exist. To get the | 481 | * needing to be zeroed may exist. |
503 | * block where the last byte in the file currently resides, | 482 | * |
504 | * we need to subtract one from the size and truncate back | 483 | * To get the block where the last byte in the file currently resides, |
505 | * to a block boundary. We subtract 1 in case the size is | 484 | * we need to subtract one from the size and truncate back to a block |
506 | * exactly on a block boundary. | 485 | * boundary. We subtract 1 in case the size is exactly on a block |
486 | * boundary. | ||
507 | */ | 487 | */ |
508 | last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; | 488 | last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; |
509 | start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); | 489 | start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); |
@@ -521,23 +501,18 @@ xfs_zero_eof( | |||
521 | while (start_zero_fsb <= end_zero_fsb) { | 501 | while (start_zero_fsb <= end_zero_fsb) { |
522 | nimaps = 1; | 502 | nimaps = 1; |
523 | zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; | 503 | zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; |
504 | |||
505 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
524 | error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb, | 506 | error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb, |
525 | &imap, &nimaps, 0); | 507 | &imap, &nimaps, 0); |
526 | if (error) { | 508 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
527 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 509 | if (error) |
528 | return error; | 510 | return error; |
529 | } | 511 | |
530 | ASSERT(nimaps > 0); | 512 | ASSERT(nimaps > 0); |
531 | 513 | ||
532 | if (imap.br_state == XFS_EXT_UNWRITTEN || | 514 | if (imap.br_state == XFS_EXT_UNWRITTEN || |
533 | imap.br_startblock == HOLESTARTBLOCK) { | 515 | imap.br_startblock == HOLESTARTBLOCK) { |
534 | /* | ||
535 | * This loop handles initializing pages that were | ||
536 | * partially initialized by the code below this | ||
537 | * loop. It basically zeroes the part of the page | ||
538 | * that sits on a hole and sets the page as P_HOLE | ||
539 | * and calls remapf if it is a mapped file. | ||
540 | */ | ||
541 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; | 516 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; |
542 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); | 517 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); |
543 | continue; | 518 | continue; |
@@ -545,11 +520,7 @@ xfs_zero_eof( | |||
545 | 520 | ||
546 | /* | 521 | /* |
547 | * There are blocks we need to zero. | 522 | * There are blocks we need to zero. |
548 | * Drop the inode lock while we're doing the I/O. | ||
549 | * We'll still have the iolock to protect us. | ||
550 | */ | 523 | */ |
551 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
552 | |||
553 | zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); | 524 | zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); |
554 | zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); | 525 | zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); |
555 | 526 | ||
@@ -557,22 +528,14 @@ xfs_zero_eof( | |||
557 | zero_len = offset - zero_off; | 528 | zero_len = offset - zero_off; |
558 | 529 | ||
559 | error = xfs_iozero(ip, zero_off, zero_len); | 530 | error = xfs_iozero(ip, zero_off, zero_len); |
560 | if (error) { | 531 | if (error) |
561 | goto out_lock; | 532 | return error; |
562 | } | ||
563 | 533 | ||
564 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; | 534 | start_zero_fsb = imap.br_startoff + imap.br_blockcount; |
565 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); | 535 | ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); |
566 | |||
567 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
568 | } | 536 | } |
569 | 537 | ||
570 | return 0; | 538 | return 0; |
571 | |||
572 | out_lock: | ||
573 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
574 | ASSERT(error >= 0); | ||
575 | return error; | ||
576 | } | 539 | } |
577 | 540 | ||
578 | /* | 541 | /* |
@@ -593,35 +556,29 @@ xfs_file_aio_write_checks( | |||
593 | struct xfs_inode *ip = XFS_I(inode); | 556 | struct xfs_inode *ip = XFS_I(inode); |
594 | int error = 0; | 557 | int error = 0; |
595 | 558 | ||
596 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
597 | restart: | 559 | restart: |
598 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); | 560 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
599 | if (error) { | 561 | if (error) |
600 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
601 | return error; | 562 | return error; |
602 | } | ||
603 | 563 | ||
604 | /* | 564 | /* |
605 | * If the offset is beyond the size of the file, we need to zero any | 565 | * If the offset is beyond the size of the file, we need to zero any |
606 | * blocks that fall between the existing EOF and the start of this | 566 | * blocks that fall between the existing EOF and the start of this |
607 | * write. If zeroing is needed and we are currently holding the | 567 | * write. If zeroing is needed and we are currently holding the |
608 | * iolock shared, we need to update it to exclusive which involves | 568 | * iolock shared, we need to update it to exclusive which implies |
609 | * dropping all locks and relocking to maintain correct locking order. | 569 | * having to redo all checks before. |
610 | * If we do this, restart the function to ensure all checks and values | ||
611 | * are still valid. | ||
612 | */ | 570 | */ |
613 | if (*pos > i_size_read(inode)) { | 571 | if (*pos > i_size_read(inode)) { |
614 | if (*iolock == XFS_IOLOCK_SHARED) { | 572 | if (*iolock == XFS_IOLOCK_SHARED) { |
615 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | 573 | xfs_rw_iunlock(ip, *iolock); |
616 | *iolock = XFS_IOLOCK_EXCL; | 574 | *iolock = XFS_IOLOCK_EXCL; |
617 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | 575 | xfs_rw_ilock(ip, *iolock); |
618 | goto restart; | 576 | goto restart; |
619 | } | 577 | } |
620 | error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); | 578 | error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); |
579 | if (error) | ||
580 | return error; | ||
621 | } | 581 | } |
622 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
623 | if (error) | ||
624 | return error; | ||
625 | 582 | ||
626 | /* | 583 | /* |
627 | * Updating the timestamps will grab the ilock again from | 584 | * Updating the timestamps will grab the ilock again from |
@@ -638,7 +595,6 @@ restart: | |||
638 | * people from modifying setuid and setgid binaries. | 595 | * people from modifying setuid and setgid binaries. |
639 | */ | 596 | */ |
640 | return file_remove_suid(file); | 597 | return file_remove_suid(file); |
641 | |||
642 | } | 598 | } |
643 | 599 | ||
644 | /* | 600 | /* |
@@ -1007,8 +963,149 @@ xfs_vm_page_mkwrite( | |||
1007 | return block_page_mkwrite(vma, vmf, xfs_get_blocks); | 963 | return block_page_mkwrite(vma, vmf, xfs_get_blocks); |
1008 | } | 964 | } |
1009 | 965 | ||
966 | STATIC loff_t | ||
967 | xfs_seek_data( | ||
968 | struct file *file, | ||
969 | loff_t start, | ||
970 | u32 type) | ||
971 | { | ||
972 | struct inode *inode = file->f_mapping->host; | ||
973 | struct xfs_inode *ip = XFS_I(inode); | ||
974 | struct xfs_mount *mp = ip->i_mount; | ||
975 | struct xfs_bmbt_irec map[2]; | ||
976 | int nmap = 2; | ||
977 | loff_t uninitialized_var(offset); | ||
978 | xfs_fsize_t isize; | ||
979 | xfs_fileoff_t fsbno; | ||
980 | xfs_filblks_t end; | ||
981 | uint lock; | ||
982 | int error; | ||
983 | |||
984 | lock = xfs_ilock_map_shared(ip); | ||
985 | |||
986 | isize = i_size_read(inode); | ||
987 | if (start >= isize) { | ||
988 | error = ENXIO; | ||
989 | goto out_unlock; | ||
990 | } | ||
991 | |||
992 | fsbno = XFS_B_TO_FSBT(mp, start); | ||
993 | |||
994 | /* | ||
995 | * Try to read extents from the first block indicated | ||
996 | * by fsbno to the end block of the file. | ||
997 | */ | ||
998 | end = XFS_B_TO_FSB(mp, isize); | ||
999 | |||
1000 | error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, | ||
1001 | XFS_BMAPI_ENTIRE); | ||
1002 | if (error) | ||
1003 | goto out_unlock; | ||
1004 | |||
1005 | /* | ||
1006 | * Treat unwritten extent as data extent since it might | ||
1007 | * contains dirty data in page cache. | ||
1008 | */ | ||
1009 | if (map[0].br_startblock != HOLESTARTBLOCK) { | ||
1010 | offset = max_t(loff_t, start, | ||
1011 | XFS_FSB_TO_B(mp, map[0].br_startoff)); | ||
1012 | } else { | ||
1013 | if (nmap == 1) { | ||
1014 | error = ENXIO; | ||
1015 | goto out_unlock; | ||
1016 | } | ||
1017 | |||
1018 | offset = max_t(loff_t, start, | ||
1019 | XFS_FSB_TO_B(mp, map[1].br_startoff)); | ||
1020 | } | ||
1021 | |||
1022 | if (offset != file->f_pos) | ||
1023 | file->f_pos = offset; | ||
1024 | |||
1025 | out_unlock: | ||
1026 | xfs_iunlock_map_shared(ip, lock); | ||
1027 | |||
1028 | if (error) | ||
1029 | return -error; | ||
1030 | return offset; | ||
1031 | } | ||
1032 | |||
1033 | STATIC loff_t | ||
1034 | xfs_seek_hole( | ||
1035 | struct file *file, | ||
1036 | loff_t start, | ||
1037 | u32 type) | ||
1038 | { | ||
1039 | struct inode *inode = file->f_mapping->host; | ||
1040 | struct xfs_inode *ip = XFS_I(inode); | ||
1041 | struct xfs_mount *mp = ip->i_mount; | ||
1042 | loff_t uninitialized_var(offset); | ||
1043 | loff_t holeoff; | ||
1044 | xfs_fsize_t isize; | ||
1045 | xfs_fileoff_t fsbno; | ||
1046 | uint lock; | ||
1047 | int error; | ||
1048 | |||
1049 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1050 | return -XFS_ERROR(EIO); | ||
1051 | |||
1052 | lock = xfs_ilock_map_shared(ip); | ||
1053 | |||
1054 | isize = i_size_read(inode); | ||
1055 | if (start >= isize) { | ||
1056 | error = ENXIO; | ||
1057 | goto out_unlock; | ||
1058 | } | ||
1059 | |||
1060 | fsbno = XFS_B_TO_FSBT(mp, start); | ||
1061 | error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK); | ||
1062 | if (error) | ||
1063 | goto out_unlock; | ||
1064 | |||
1065 | holeoff = XFS_FSB_TO_B(mp, fsbno); | ||
1066 | if (holeoff <= start) | ||
1067 | offset = start; | ||
1068 | else { | ||
1069 | /* | ||
1070 | * xfs_bmap_first_unused() could return a value bigger than | ||
1071 | * isize if there are no more holes past the supplied offset. | ||
1072 | */ | ||
1073 | offset = min_t(loff_t, holeoff, isize); | ||
1074 | } | ||
1075 | |||
1076 | if (offset != file->f_pos) | ||
1077 | file->f_pos = offset; | ||
1078 | |||
1079 | out_unlock: | ||
1080 | xfs_iunlock_map_shared(ip, lock); | ||
1081 | |||
1082 | if (error) | ||
1083 | return -error; | ||
1084 | return offset; | ||
1085 | } | ||
1086 | |||
1087 | STATIC loff_t | ||
1088 | xfs_file_llseek( | ||
1089 | struct file *file, | ||
1090 | loff_t offset, | ||
1091 | int origin) | ||
1092 | { | ||
1093 | switch (origin) { | ||
1094 | case SEEK_END: | ||
1095 | case SEEK_CUR: | ||
1096 | case SEEK_SET: | ||
1097 | return generic_file_llseek(file, offset, origin); | ||
1098 | case SEEK_DATA: | ||
1099 | return xfs_seek_data(file, offset, origin); | ||
1100 | case SEEK_HOLE: | ||
1101 | return xfs_seek_hole(file, offset, origin); | ||
1102 | default: | ||
1103 | return -EINVAL; | ||
1104 | } | ||
1105 | } | ||
1106 | |||
1010 | const struct file_operations xfs_file_operations = { | 1107 | const struct file_operations xfs_file_operations = { |
1011 | .llseek = generic_file_llseek, | 1108 | .llseek = xfs_file_llseek, |
1012 | .read = do_sync_read, | 1109 | .read = do_sync_read, |
1013 | .write = do_sync_write, | 1110 | .write = do_sync_write, |
1014 | .aio_read = xfs_file_aio_read, | 1111 | .aio_read = xfs_file_aio_read, |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 1c6fdeb702ff..c25b094efbf7 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -18,8 +18,6 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
@@ -39,7 +37,6 @@ | |||
39 | #include "xfs_itable.h" | 37 | #include "xfs_itable.h" |
40 | #include "xfs_trans_space.h" | 38 | #include "xfs_trans_space.h" |
41 | #include "xfs_rtalloc.h" | 39 | #include "xfs_rtalloc.h" |
42 | #include "xfs_rw.h" | ||
43 | #include "xfs_filestream.h" | 40 | #include "xfs_filestream.h" |
44 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
45 | 42 | ||
@@ -147,9 +144,9 @@ xfs_growfs_data_private( | |||
147 | if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) | 144 | if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) |
148 | return error; | 145 | return error; |
149 | dpct = pct - mp->m_sb.sb_imax_pct; | 146 | dpct = pct - mp->m_sb.sb_imax_pct; |
150 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, | 147 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, |
151 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), | 148 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), |
152 | BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); | 149 | XFS_FSS_TO_BB(mp, 1), 0); |
153 | if (!bp) | 150 | if (!bp) |
154 | return EIO; | 151 | return EIO; |
155 | xfs_buf_relse(bp); | 152 | xfs_buf_relse(bp); |
@@ -193,7 +190,7 @@ xfs_growfs_data_private( | |||
193 | */ | 190 | */ |
194 | bp = xfs_buf_get(mp->m_ddev_targp, | 191 | bp = xfs_buf_get(mp->m_ddev_targp, |
195 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), | 192 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), |
196 | XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED); | 193 | XFS_FSS_TO_BB(mp, 1), 0); |
197 | if (!bp) { | 194 | if (!bp) { |
198 | error = ENOMEM; | 195 | error = ENOMEM; |
199 | goto error0; | 196 | goto error0; |
@@ -230,7 +227,7 @@ xfs_growfs_data_private( | |||
230 | */ | 227 | */ |
231 | bp = xfs_buf_get(mp->m_ddev_targp, | 228 | bp = xfs_buf_get(mp->m_ddev_targp, |
232 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), | 229 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), |
233 | XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED); | 230 | XFS_FSS_TO_BB(mp, 1), 0); |
234 | if (!bp) { | 231 | if (!bp) { |
235 | error = ENOMEM; | 232 | error = ENOMEM; |
236 | goto error0; | 233 | goto error0; |
@@ -259,8 +256,7 @@ xfs_growfs_data_private( | |||
259 | */ | 256 | */ |
260 | bp = xfs_buf_get(mp->m_ddev_targp, | 257 | bp = xfs_buf_get(mp->m_ddev_targp, |
261 | XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), | 258 | XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), |
262 | BTOBB(mp->m_sb.sb_blocksize), | 259 | BTOBB(mp->m_sb.sb_blocksize), 0); |
263 | XBF_LOCK | XBF_MAPPED); | ||
264 | if (!bp) { | 260 | if (!bp) { |
265 | error = ENOMEM; | 261 | error = ENOMEM; |
266 | goto error0; | 262 | goto error0; |
@@ -286,8 +282,7 @@ xfs_growfs_data_private( | |||
286 | */ | 282 | */ |
287 | bp = xfs_buf_get(mp->m_ddev_targp, | 283 | bp = xfs_buf_get(mp->m_ddev_targp, |
288 | XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), | 284 | XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), |
289 | BTOBB(mp->m_sb.sb_blocksize), | 285 | BTOBB(mp->m_sb.sb_blocksize), 0); |
290 | XBF_LOCK | XBF_MAPPED); | ||
291 | if (!bp) { | 286 | if (!bp) { |
292 | error = ENOMEM; | 287 | error = ENOMEM; |
293 | goto error0; | 288 | goto error0; |
@@ -314,8 +309,7 @@ xfs_growfs_data_private( | |||
314 | */ | 309 | */ |
315 | bp = xfs_buf_get(mp->m_ddev_targp, | 310 | bp = xfs_buf_get(mp->m_ddev_targp, |
316 | XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), | 311 | XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), |
317 | BTOBB(mp->m_sb.sb_blocksize), | 312 | BTOBB(mp->m_sb.sb_blocksize), 0); |
318 | XBF_LOCK | XBF_MAPPED); | ||
319 | if (!bp) { | 313 | if (!bp) { |
320 | error = ENOMEM; | 314 | error = ENOMEM; |
321 | goto error0; | 315 | goto error0; |
@@ -405,7 +399,7 @@ xfs_growfs_data_private( | |||
405 | 399 | ||
406 | /* update secondary superblocks. */ | 400 | /* update secondary superblocks. */ |
407 | for (agno = 1; agno < nagcount; agno++) { | 401 | for (agno = 1; agno < nagcount; agno++) { |
408 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 402 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, |
409 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), | 403 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), |
410 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 404 | XFS_FSS_TO_BB(mp, 1), 0, &bp); |
411 | if (error) { | 405 | if (error) { |
@@ -693,3 +687,63 @@ xfs_fs_goingdown( | |||
693 | 687 | ||
694 | return 0; | 688 | return 0; |
695 | } | 689 | } |
690 | |||
691 | /* | ||
692 | * Force a shutdown of the filesystem instantly while keeping the filesystem | ||
693 | * consistent. We don't do an unmount here; just shutdown the shop, make sure | ||
694 | * that absolutely nothing persistent happens to this filesystem after this | ||
695 | * point. | ||
696 | */ | ||
697 | void | ||
698 | xfs_do_force_shutdown( | ||
699 | xfs_mount_t *mp, | ||
700 | int flags, | ||
701 | char *fname, | ||
702 | int lnnum) | ||
703 | { | ||
704 | int logerror; | ||
705 | |||
706 | logerror = flags & SHUTDOWN_LOG_IO_ERROR; | ||
707 | |||
708 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
709 | xfs_notice(mp, | ||
710 | "%s(0x%x) called from line %d of file %s. Return address = 0x%p", | ||
711 | __func__, flags, lnnum, fname, __return_address); | ||
712 | } | ||
713 | /* | ||
714 | * No need to duplicate efforts. | ||
715 | */ | ||
716 | if (XFS_FORCED_SHUTDOWN(mp) && !logerror) | ||
717 | return; | ||
718 | |||
719 | /* | ||
720 | * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't | ||
721 | * queue up anybody new on the log reservations, and wakes up | ||
722 | * everybody who's sleeping on log reservations to tell them | ||
723 | * the bad news. | ||
724 | */ | ||
725 | if (xfs_log_force_umount(mp, logerror)) | ||
726 | return; | ||
727 | |||
728 | if (flags & SHUTDOWN_CORRUPT_INCORE) { | ||
729 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, | ||
730 | "Corruption of in-memory data detected. Shutting down filesystem"); | ||
731 | if (XFS_ERRLEVEL_HIGH <= xfs_error_level) | ||
732 | xfs_stack_trace(); | ||
733 | } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
734 | if (logerror) { | ||
735 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, | ||
736 | "Log I/O Error Detected. Shutting down filesystem"); | ||
737 | } else if (flags & SHUTDOWN_DEVICE_REQ) { | ||
738 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, | ||
739 | "All device paths lost. Shutting down filesystem"); | ||
740 | } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { | ||
741 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, | ||
742 | "I/O Error Detected. Shutting down filesystem"); | ||
743 | } | ||
744 | } | ||
745 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
746 | xfs_alert(mp, | ||
747 | "Please umount the filesystem and rectify the problem(s)"); | ||
748 | } | ||
749 | } | ||
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index dad1a31aa4fc..177a21a7ac49 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -200,8 +200,7 @@ xfs_ialloc_inode_init( | |||
200 | */ | 200 | */ |
201 | d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); | 201 | d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); |
202 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, | 202 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, |
203 | mp->m_bsize * blks_per_cluster, | 203 | mp->m_bsize * blks_per_cluster, 0); |
204 | XBF_LOCK); | ||
205 | if (!fbuf) | 204 | if (!fbuf) |
206 | return ENOMEM; | 205 | return ENOMEM; |
207 | /* | 206 | /* |
@@ -610,6 +609,13 @@ xfs_ialloc_get_rec( | |||
610 | /* | 609 | /* |
611 | * Visible inode allocation functions. | 610 | * Visible inode allocation functions. |
612 | */ | 611 | */ |
612 | /* | ||
613 | * Find a free (set) bit in the inode bitmask. | ||
614 | */ | ||
615 | static inline int xfs_ialloc_find_free(xfs_inofree_t *fp) | ||
616 | { | ||
617 | return xfs_lowbit64(*fp); | ||
618 | } | ||
613 | 619 | ||
614 | /* | 620 | /* |
615 | * Allocate an inode on disk. | 621 | * Allocate an inode on disk. |
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 666a037398d6..65ac57c8063c 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h | |||
@@ -47,15 +47,6 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) | |||
47 | } | 47 | } |
48 | 48 | ||
49 | /* | 49 | /* |
50 | * Find a free (set) bit in the inode bitmask. | ||
51 | */ | ||
52 | static inline int xfs_ialloc_find_free(xfs_inofree_t *fp) | ||
53 | { | ||
54 | return xfs_lowbit64(*fp); | ||
55 | } | ||
56 | |||
57 | |||
58 | /* | ||
59 | * Allocate an inode on disk. | 50 | * Allocate an inode on disk. |
60 | * Mode is used to tell whether the new inode will need space, and whether | 51 | * Mode is used to tell whether the new inode will need space, and whether |
61 | * it is a directory. | 52 | * it is a directory. |
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index c6a75815aea0..2b8b7a37aa18 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index bcc6c249b2c7..1bb4365e8c25 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_acl.h" | 21 | #include "xfs_acl.h" |
22 | #include "xfs_bit.h" | ||
23 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
24 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
25 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
@@ -123,23 +122,7 @@ xfs_inode_free( | |||
123 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | 122 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); |
124 | 123 | ||
125 | if (ip->i_itemp) { | 124 | if (ip->i_itemp) { |
126 | /* | 125 | ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL)); |
127 | * Only if we are shutting down the fs will we see an | ||
128 | * inode still in the AIL. If it is there, we should remove | ||
129 | * it to prevent a use-after-free from occurring. | ||
130 | */ | ||
131 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
132 | struct xfs_ail *ailp = lip->li_ailp; | ||
133 | |||
134 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
135 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
136 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
137 | spin_lock(&ailp->xa_lock); | ||
138 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
139 | xfs_trans_ail_delete(ailp, lip); | ||
140 | else | ||
141 | spin_unlock(&ailp->xa_lock); | ||
142 | } | ||
143 | xfs_inode_item_destroy(ip); | 126 | xfs_inode_item_destroy(ip); |
144 | ip->i_itemp = NULL; | 127 | ip->i_itemp = NULL; |
145 | } | 128 | } |
@@ -334,9 +317,10 @@ xfs_iget_cache_miss( | |||
334 | /* | 317 | /* |
335 | * Preload the radix tree so we can insert safely under the | 318 | * Preload the radix tree so we can insert safely under the |
336 | * write spinlock. Note that we cannot sleep inside the preload | 319 | * write spinlock. Note that we cannot sleep inside the preload |
337 | * region. | 320 | * region. Since we can be called from transaction context, don't |
321 | * recurse into the file system. | ||
338 | */ | 322 | */ |
339 | if (radix_tree_preload(GFP_KERNEL)) { | 323 | if (radix_tree_preload(GFP_NOFS)) { |
340 | error = EAGAIN; | 324 | error = EAGAIN; |
341 | goto out_destroy; | 325 | goto out_destroy; |
342 | } | 326 | } |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bc46c0a133d3..a59eea09930a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs.h" | 20 | #include "xfs.h" |
21 | #include "xfs_fs.h" | 21 | #include "xfs_fs.h" |
22 | #include "xfs_types.h" | 22 | #include "xfs_types.h" |
23 | #include "xfs_bit.h" | ||
24 | #include "xfs_log.h" | 23 | #include "xfs_log.h" |
25 | #include "xfs_inum.h" | 24 | #include "xfs_inum.h" |
26 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
@@ -61,6 +60,20 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); | |||
61 | STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); | 60 | STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); |
62 | STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); | 61 | STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); |
63 | 62 | ||
63 | /* | ||
64 | * helper function to extract extent size hint from inode | ||
65 | */ | ||
66 | xfs_extlen_t | ||
67 | xfs_get_extsz_hint( | ||
68 | struct xfs_inode *ip) | ||
69 | { | ||
70 | if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) | ||
71 | return ip->i_d.di_extsize; | ||
72 | if (XFS_IS_REALTIME_INODE(ip)) | ||
73 | return ip->i_mount->m_sb.sb_rextsize; | ||
74 | return 0; | ||
75 | } | ||
76 | |||
64 | #ifdef DEBUG | 77 | #ifdef DEBUG |
65 | /* | 78 | /* |
66 | * Make sure that the extents in the given memory buffer | 79 | * Make sure that the extents in the given memory buffer |
@@ -137,6 +150,7 @@ xfs_imap_to_bp( | |||
137 | int ni; | 150 | int ni; |
138 | xfs_buf_t *bp; | 151 | xfs_buf_t *bp; |
139 | 152 | ||
153 | buf_flags |= XBF_UNMAPPED; | ||
140 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | 154 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, |
141 | (int)imap->im_len, buf_flags, &bp); | 155 | (int)imap->im_len, buf_flags, &bp); |
142 | if (error) { | 156 | if (error) { |
@@ -226,7 +240,7 @@ xfs_inotobp( | |||
226 | if (error) | 240 | if (error) |
227 | return error; | 241 | return error; |
228 | 242 | ||
229 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags); | 243 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, 0, imap_flags); |
230 | if (error) | 244 | if (error) |
231 | return error; | 245 | return error; |
232 | 246 | ||
@@ -782,8 +796,7 @@ xfs_iread( | |||
782 | /* | 796 | /* |
783 | * Get pointers to the on-disk inode and the buffer containing it. | 797 | * Get pointers to the on-disk inode and the buffer containing it. |
784 | */ | 798 | */ |
785 | error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, | 799 | error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 0, iget_flags); |
786 | XBF_LOCK, iget_flags); | ||
787 | if (error) | 800 | if (error) |
788 | return error; | 801 | return error; |
789 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); | 802 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); |
@@ -1342,7 +1355,7 @@ xfs_iunlink( | |||
1342 | * Here we put the head pointer into our next pointer, | 1355 | * Here we put the head pointer into our next pointer, |
1343 | * and then we fall through to point the head at us. | 1356 | * and then we fall through to point the head at us. |
1344 | */ | 1357 | */ |
1345 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); | 1358 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); |
1346 | if (error) | 1359 | if (error) |
1347 | return error; | 1360 | return error; |
1348 | 1361 | ||
@@ -1423,7 +1436,7 @@ xfs_iunlink_remove( | |||
1423 | * of dealing with the buffer when there is no need to | 1436 | * of dealing with the buffer when there is no need to |
1424 | * change it. | 1437 | * change it. |
1425 | */ | 1438 | */ |
1426 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); | 1439 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); |
1427 | if (error) { | 1440 | if (error) { |
1428 | xfs_warn(mp, "%s: xfs_itobp() returned error %d.", | 1441 | xfs_warn(mp, "%s: xfs_itobp() returned error %d.", |
1429 | __func__, error); | 1442 | __func__, error); |
@@ -1484,7 +1497,7 @@ xfs_iunlink_remove( | |||
1484 | * Now last_ibp points to the buffer previous to us on | 1497 | * Now last_ibp points to the buffer previous to us on |
1485 | * the unlinked list. Pull us from the list. | 1498 | * the unlinked list. Pull us from the list. |
1486 | */ | 1499 | */ |
1487 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); | 1500 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); |
1488 | if (error) { | 1501 | if (error) { |
1489 | xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.", | 1502 | xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.", |
1490 | __func__, error); | 1503 | __func__, error); |
@@ -1566,8 +1579,7 @@ xfs_ifree_cluster( | |||
1566 | * to mark all the active inodes on the buffer stale. | 1579 | * to mark all the active inodes on the buffer stale. |
1567 | */ | 1580 | */ |
1568 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, | 1581 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, |
1569 | mp->m_bsize * blks_per_cluster, | 1582 | mp->m_bsize * blks_per_cluster, 0); |
1570 | XBF_LOCK); | ||
1571 | 1583 | ||
1572 | if (!bp) | 1584 | if (!bp) |
1573 | return ENOMEM; | 1585 | return ENOMEM; |
@@ -1737,7 +1749,7 @@ xfs_ifree( | |||
1737 | 1749 | ||
1738 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 1750 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
1739 | 1751 | ||
1740 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK); | 1752 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0); |
1741 | if (error) | 1753 | if (error) |
1742 | return error; | 1754 | return error; |
1743 | 1755 | ||
@@ -2347,11 +2359,11 @@ cluster_corrupt_out: | |||
2347 | */ | 2359 | */ |
2348 | rcu_read_unlock(); | 2360 | rcu_read_unlock(); |
2349 | /* | 2361 | /* |
2350 | * Clean up the buffer. If it was B_DELWRI, just release it -- | 2362 | * Clean up the buffer. If it was delwri, just release it -- |
2351 | * brelse can handle it with no problems. If not, shut down the | 2363 | * brelse can handle it with no problems. If not, shut down the |
2352 | * filesystem before releasing the buffer. | 2364 | * filesystem before releasing the buffer. |
2353 | */ | 2365 | */ |
2354 | bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); | 2366 | bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q); |
2355 | if (bufwasdelwri) | 2367 | if (bufwasdelwri) |
2356 | xfs_buf_relse(bp); | 2368 | xfs_buf_relse(bp); |
2357 | 2369 | ||
@@ -2377,30 +2389,29 @@ cluster_corrupt_out: | |||
2377 | /* | 2389 | /* |
2378 | * Unlocks the flush lock | 2390 | * Unlocks the flush lock |
2379 | */ | 2391 | */ |
2380 | xfs_iflush_abort(iq); | 2392 | xfs_iflush_abort(iq, false); |
2381 | kmem_free(ilist); | 2393 | kmem_free(ilist); |
2382 | xfs_perag_put(pag); | 2394 | xfs_perag_put(pag); |
2383 | return XFS_ERROR(EFSCORRUPTED); | 2395 | return XFS_ERROR(EFSCORRUPTED); |
2384 | } | 2396 | } |
2385 | 2397 | ||
2386 | /* | 2398 | /* |
2387 | * xfs_iflush() will write a modified inode's changes out to the | 2399 | * Flush dirty inode metadata into the backing buffer. |
2388 | * inode's on disk home. The caller must have the inode lock held | 2400 | * |
2389 | * in at least shared mode and the inode flush completion must be | 2401 | * The caller must have the inode lock and the inode flush lock held. The |
2390 | * active as well. The inode lock will still be held upon return from | 2402 | * inode lock will still be held upon return to the caller, and the inode |
2391 | * the call and the caller is free to unlock it. | 2403 | * flush lock will be released after the inode has reached the disk. |
2392 | * The inode flush will be completed when the inode reaches the disk. | 2404 | * |
2393 | * The flags indicate how the inode's buffer should be written out. | 2405 | * The caller must write out the buffer returned in *bpp and release it. |
2394 | */ | 2406 | */ |
2395 | int | 2407 | int |
2396 | xfs_iflush( | 2408 | xfs_iflush( |
2397 | xfs_inode_t *ip, | 2409 | struct xfs_inode *ip, |
2398 | uint flags) | 2410 | struct xfs_buf **bpp) |
2399 | { | 2411 | { |
2400 | xfs_inode_log_item_t *iip; | 2412 | struct xfs_mount *mp = ip->i_mount; |
2401 | xfs_buf_t *bp; | 2413 | struct xfs_buf *bp; |
2402 | xfs_dinode_t *dip; | 2414 | struct xfs_dinode *dip; |
2403 | xfs_mount_t *mp; | ||
2404 | int error; | 2415 | int error; |
2405 | 2416 | ||
2406 | XFS_STATS_INC(xs_iflush_count); | 2417 | XFS_STATS_INC(xs_iflush_count); |
@@ -2410,25 +2421,8 @@ xfs_iflush( | |||
2410 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 2421 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
2411 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); | 2422 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
2412 | 2423 | ||
2413 | iip = ip->i_itemp; | 2424 | *bpp = NULL; |
2414 | mp = ip->i_mount; | ||
2415 | 2425 | ||
2416 | /* | ||
2417 | * We can't flush the inode until it is unpinned, so wait for it if we | ||
2418 | * are allowed to block. We know no one new can pin it, because we are | ||
2419 | * holding the inode lock shared and you need to hold it exclusively to | ||
2420 | * pin the inode. | ||
2421 | * | ||
2422 | * If we are not allowed to block, force the log out asynchronously so | ||
2423 | * that when we come back the inode will be unpinned. If other inodes | ||
2424 | * in the same cluster are dirty, they will probably write the inode | ||
2425 | * out for us if they occur after the log force completes. | ||
2426 | */ | ||
2427 | if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { | ||
2428 | xfs_iunpin(ip); | ||
2429 | xfs_ifunlock(ip); | ||
2430 | return EAGAIN; | ||
2431 | } | ||
2432 | xfs_iunpin_wait(ip); | 2426 | xfs_iunpin_wait(ip); |
2433 | 2427 | ||
2434 | /* | 2428 | /* |
@@ -2447,20 +2441,20 @@ xfs_iflush( | |||
2447 | /* | 2441 | /* |
2448 | * This may have been unpinned because the filesystem is shutting | 2442 | * This may have been unpinned because the filesystem is shutting |
2449 | * down forcibly. If that's the case we must not write this inode | 2443 | * down forcibly. If that's the case we must not write this inode |
2450 | * to disk, because the log record didn't make it to disk! | 2444 | * to disk, because the log record didn't make it to disk. |
2445 | * | ||
2446 | * We also have to remove the log item from the AIL in this case, | ||
2447 | * as we wait for an empty AIL as part of the unmount process. | ||
2451 | */ | 2448 | */ |
2452 | if (XFS_FORCED_SHUTDOWN(mp)) { | 2449 | if (XFS_FORCED_SHUTDOWN(mp)) { |
2453 | if (iip) | 2450 | error = XFS_ERROR(EIO); |
2454 | iip->ili_fields = 0; | 2451 | goto abort_out; |
2455 | xfs_ifunlock(ip); | ||
2456 | return XFS_ERROR(EIO); | ||
2457 | } | 2452 | } |
2458 | 2453 | ||
2459 | /* | 2454 | /* |
2460 | * Get the buffer containing the on-disk inode. | 2455 | * Get the buffer containing the on-disk inode. |
2461 | */ | 2456 | */ |
2462 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, | 2457 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK); |
2463 | (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK); | ||
2464 | if (error || !bp) { | 2458 | if (error || !bp) { |
2465 | xfs_ifunlock(ip); | 2459 | xfs_ifunlock(ip); |
2466 | return error; | 2460 | return error; |
@@ -2488,23 +2482,20 @@ xfs_iflush( | |||
2488 | if (error) | 2482 | if (error) |
2489 | goto cluster_corrupt_out; | 2483 | goto cluster_corrupt_out; |
2490 | 2484 | ||
2491 | if (flags & SYNC_WAIT) | 2485 | *bpp = bp; |
2492 | error = xfs_bwrite(bp); | 2486 | return 0; |
2493 | else | ||
2494 | xfs_buf_delwri_queue(bp); | ||
2495 | |||
2496 | xfs_buf_relse(bp); | ||
2497 | return error; | ||
2498 | 2487 | ||
2499 | corrupt_out: | 2488 | corrupt_out: |
2500 | xfs_buf_relse(bp); | 2489 | xfs_buf_relse(bp); |
2501 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 2490 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
2502 | cluster_corrupt_out: | 2491 | cluster_corrupt_out: |
2492 | error = XFS_ERROR(EFSCORRUPTED); | ||
2493 | abort_out: | ||
2503 | /* | 2494 | /* |
2504 | * Unlocks the flush lock | 2495 | * Unlocks the flush lock |
2505 | */ | 2496 | */ |
2506 | xfs_iflush_abort(ip); | 2497 | xfs_iflush_abort(ip, false); |
2507 | return XFS_ERROR(EFSCORRUPTED); | 2498 | return error; |
2508 | } | 2499 | } |
2509 | 2500 | ||
2510 | 2501 | ||
@@ -2706,27 +2697,6 @@ corrupt_out: | |||
2706 | return XFS_ERROR(EFSCORRUPTED); | 2697 | return XFS_ERROR(EFSCORRUPTED); |
2707 | } | 2698 | } |
2708 | 2699 | ||
2709 | void | ||
2710 | xfs_promote_inode( | ||
2711 | struct xfs_inode *ip) | ||
2712 | { | ||
2713 | struct xfs_buf *bp; | ||
2714 | |||
2715 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | ||
2716 | |||
2717 | bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno, | ||
2718 | ip->i_imap.im_len, XBF_TRYLOCK); | ||
2719 | if (!bp) | ||
2720 | return; | ||
2721 | |||
2722 | if (XFS_BUF_ISDELAYWRITE(bp)) { | ||
2723 | xfs_buf_delwri_promote(bp); | ||
2724 | wake_up_process(ip->i_mount->m_ddev_targp->bt_task); | ||
2725 | } | ||
2726 | |||
2727 | xfs_buf_relse(bp); | ||
2728 | } | ||
2729 | |||
2730 | /* | 2700 | /* |
2731 | * Return a pointer to the extent record at file index idx. | 2701 | * Return a pointer to the extent record at file index idx. |
2732 | */ | 2702 | */ |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7fee3387e1c8..1efff36a75b6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -529,11 +529,12 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | |||
529 | 529 | ||
530 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 530 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
531 | void xfs_iunpin_wait(xfs_inode_t *); | 531 | void xfs_iunpin_wait(xfs_inode_t *); |
532 | int xfs_iflush(xfs_inode_t *, uint); | 532 | int xfs_iflush(struct xfs_inode *, struct xfs_buf **); |
533 | void xfs_promote_inode(struct xfs_inode *); | ||
534 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 533 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
535 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 534 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
536 | 535 | ||
536 | xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); | ||
537 | |||
537 | #define IHOLD(ip) \ | 538 | #define IHOLD(ip) \ |
538 | do { \ | 539 | do { \ |
539 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ | 540 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 05d924efceaf..6cdbf90c6f7b 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -18,9 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
@@ -480,25 +478,16 @@ xfs_inode_item_unpin( | |||
480 | wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); | 478 | wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); |
481 | } | 479 | } |
482 | 480 | ||
483 | /* | ||
484 | * This is called to attempt to lock the inode associated with this | ||
485 | * inode log item, in preparation for the push routine which does the actual | ||
486 | * iflush. Don't sleep on the inode lock or the flush lock. | ||
487 | * | ||
488 | * If the flush lock is already held, indicating that the inode has | ||
489 | * been or is in the process of being flushed, then (ideally) we'd like to | ||
490 | * see if the inode's buffer is still incore, and if so give it a nudge. | ||
491 | * We delay doing so until the pushbuf routine, though, to avoid holding | ||
492 | * the AIL lock across a call to the blackhole which is the buffer cache. | ||
493 | * Also we don't want to sleep in any device strategy routines, which can happen | ||
494 | * if we do the subsequent bawrite in here. | ||
495 | */ | ||
496 | STATIC uint | 481 | STATIC uint |
497 | xfs_inode_item_trylock( | 482 | xfs_inode_item_push( |
498 | struct xfs_log_item *lip) | 483 | struct xfs_log_item *lip, |
484 | struct list_head *buffer_list) | ||
499 | { | 485 | { |
500 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | 486 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); |
501 | struct xfs_inode *ip = iip->ili_inode; | 487 | struct xfs_inode *ip = iip->ili_inode; |
488 | struct xfs_buf *bp = NULL; | ||
489 | uint rval = XFS_ITEM_SUCCESS; | ||
490 | int error; | ||
502 | 491 | ||
503 | if (xfs_ipincount(ip) > 0) | 492 | if (xfs_ipincount(ip) > 0) |
504 | return XFS_ITEM_PINNED; | 493 | return XFS_ITEM_PINNED; |
@@ -506,30 +495,50 @@ xfs_inode_item_trylock( | |||
506 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) | 495 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) |
507 | return XFS_ITEM_LOCKED; | 496 | return XFS_ITEM_LOCKED; |
508 | 497 | ||
498 | /* | ||
499 | * Re-check the pincount now that we stabilized the value by | ||
500 | * taking the ilock. | ||
501 | */ | ||
502 | if (xfs_ipincount(ip) > 0) { | ||
503 | rval = XFS_ITEM_PINNED; | ||
504 | goto out_unlock; | ||
505 | } | ||
506 | |||
507 | /* | ||
508 | * Someone else is already flushing the inode. Nothing we can do | ||
509 | * here but wait for the flush to finish and remove the item from | ||
510 | * the AIL. | ||
511 | */ | ||
509 | if (!xfs_iflock_nowait(ip)) { | 512 | if (!xfs_iflock_nowait(ip)) { |
510 | /* | 513 | rval = XFS_ITEM_FLUSHING; |
511 | * inode has already been flushed to the backing buffer, | 514 | goto out_unlock; |
512 | * leave it locked in shared mode, pushbuf routine will | ||
513 | * unlock it. | ||
514 | */ | ||
515 | return XFS_ITEM_PUSHBUF; | ||
516 | } | 515 | } |
517 | 516 | ||
518 | /* Stale items should force out the iclog */ | 517 | /* |
518 | * Stale inode items should force out the iclog. | ||
519 | */ | ||
519 | if (ip->i_flags & XFS_ISTALE) { | 520 | if (ip->i_flags & XFS_ISTALE) { |
520 | xfs_ifunlock(ip); | 521 | xfs_ifunlock(ip); |
521 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 522 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
522 | return XFS_ITEM_PINNED; | 523 | return XFS_ITEM_PINNED; |
523 | } | 524 | } |
524 | 525 | ||
525 | #ifdef DEBUG | 526 | ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); |
526 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 527 | ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); |
527 | ASSERT(iip->ili_fields != 0); | 528 | |
528 | ASSERT(iip->ili_logged == 0); | 529 | spin_unlock(&lip->li_ailp->xa_lock); |
529 | ASSERT(lip->li_flags & XFS_LI_IN_AIL); | 530 | |
531 | error = xfs_iflush(ip, &bp); | ||
532 | if (!error) { | ||
533 | if (!xfs_buf_delwri_queue(bp, buffer_list)) | ||
534 | rval = XFS_ITEM_FLUSHING; | ||
535 | xfs_buf_relse(bp); | ||
530 | } | 536 | } |
531 | #endif | 537 | |
532 | return XFS_ITEM_SUCCESS; | 538 | spin_lock(&lip->li_ailp->xa_lock); |
539 | out_unlock: | ||
540 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
541 | return rval; | ||
533 | } | 542 | } |
534 | 543 | ||
535 | /* | 544 | /* |
@@ -614,86 +623,6 @@ xfs_inode_item_committed( | |||
614 | } | 623 | } |
615 | 624 | ||
616 | /* | 625 | /* |
617 | * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK | ||
618 | * failed to get the inode flush lock but did get the inode locked SHARED. | ||
619 | * Here we're trying to see if the inode buffer is incore, and if so whether it's | ||
620 | * marked delayed write. If that's the case, we'll promote it and that will | ||
621 | * allow the caller to write the buffer by triggering the xfsbufd to run. | ||
622 | */ | ||
623 | STATIC bool | ||
624 | xfs_inode_item_pushbuf( | ||
625 | struct xfs_log_item *lip) | ||
626 | { | ||
627 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
628 | struct xfs_inode *ip = iip->ili_inode; | ||
629 | struct xfs_buf *bp; | ||
630 | bool ret = true; | ||
631 | |||
632 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | ||
633 | |||
634 | /* | ||
635 | * If a flush is not in progress anymore, chances are that the | ||
636 | * inode was taken off the AIL. So, just get out. | ||
637 | */ | ||
638 | if (!xfs_isiflocked(ip) || | ||
639 | !(lip->li_flags & XFS_LI_IN_AIL)) { | ||
640 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
641 | return true; | ||
642 | } | ||
643 | |||
644 | bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, | ||
645 | iip->ili_format.ilf_len, XBF_TRYLOCK); | ||
646 | |||
647 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
648 | if (!bp) | ||
649 | return true; | ||
650 | if (XFS_BUF_ISDELAYWRITE(bp)) | ||
651 | xfs_buf_delwri_promote(bp); | ||
652 | if (xfs_buf_ispinned(bp)) | ||
653 | ret = false; | ||
654 | xfs_buf_relse(bp); | ||
655 | return ret; | ||
656 | } | ||
657 | |||
658 | /* | ||
659 | * This is called to asynchronously write the inode associated with this | ||
660 | * inode log item out to disk. The inode will already have been locked by | ||
661 | * a successful call to xfs_inode_item_trylock(). | ||
662 | */ | ||
663 | STATIC void | ||
664 | xfs_inode_item_push( | ||
665 | struct xfs_log_item *lip) | ||
666 | { | ||
667 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
668 | struct xfs_inode *ip = iip->ili_inode; | ||
669 | |||
670 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | ||
671 | ASSERT(xfs_isiflocked(ip)); | ||
672 | |||
673 | /* | ||
674 | * Since we were able to lock the inode's flush lock and | ||
675 | * we found it on the AIL, the inode must be dirty. This | ||
676 | * is because the inode is removed from the AIL while still | ||
677 | * holding the flush lock in xfs_iflush_done(). Thus, if | ||
678 | * we found it in the AIL and were able to obtain the flush | ||
679 | * lock without sleeping, then there must not have been | ||
680 | * anyone in the process of flushing the inode. | ||
681 | */ | ||
682 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0); | ||
683 | |||
684 | /* | ||
685 | * Push the inode to it's backing buffer. This will not remove the | ||
686 | * inode from the AIL - a further push will be required to trigger a | ||
687 | * buffer push. However, this allows all the dirty inodes to be pushed | ||
688 | * to the buffer before it is pushed to disk. The buffer IO completion | ||
689 | * will pull the inode from the AIL, mark it clean and unlock the flush | ||
690 | * lock. | ||
691 | */ | ||
692 | (void) xfs_iflush(ip, SYNC_TRYLOCK); | ||
693 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
694 | } | ||
695 | |||
696 | /* | ||
697 | * XXX rcc - this one really has to do something. Probably needs | 626 | * XXX rcc - this one really has to do something. Probably needs |
698 | * to stamp in a new field in the incore inode. | 627 | * to stamp in a new field in the incore inode. |
699 | */ | 628 | */ |
@@ -713,11 +642,9 @@ static const struct xfs_item_ops xfs_inode_item_ops = { | |||
713 | .iop_format = xfs_inode_item_format, | 642 | .iop_format = xfs_inode_item_format, |
714 | .iop_pin = xfs_inode_item_pin, | 643 | .iop_pin = xfs_inode_item_pin, |
715 | .iop_unpin = xfs_inode_item_unpin, | 644 | .iop_unpin = xfs_inode_item_unpin, |
716 | .iop_trylock = xfs_inode_item_trylock, | ||
717 | .iop_unlock = xfs_inode_item_unlock, | 645 | .iop_unlock = xfs_inode_item_unlock, |
718 | .iop_committed = xfs_inode_item_committed, | 646 | .iop_committed = xfs_inode_item_committed, |
719 | .iop_push = xfs_inode_item_push, | 647 | .iop_push = xfs_inode_item_push, |
720 | .iop_pushbuf = xfs_inode_item_pushbuf, | ||
721 | .iop_committing = xfs_inode_item_committing | 648 | .iop_committing = xfs_inode_item_committing |
722 | }; | 649 | }; |
723 | 650 | ||
@@ -848,7 +775,8 @@ xfs_iflush_done( | |||
848 | ASSERT(i <= need_ail); | 775 | ASSERT(i <= need_ail); |
849 | } | 776 | } |
850 | /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ | 777 | /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ |
851 | xfs_trans_ail_delete_bulk(ailp, log_items, i); | 778 | xfs_trans_ail_delete_bulk(ailp, log_items, i, |
779 | SHUTDOWN_CORRUPT_INCORE); | ||
852 | } | 780 | } |
853 | 781 | ||
854 | 782 | ||
@@ -869,16 +797,15 @@ xfs_iflush_done( | |||
869 | } | 797 | } |
870 | 798 | ||
871 | /* | 799 | /* |
872 | * This is the inode flushing abort routine. It is called | 800 | * This is the inode flushing abort routine. It is called from xfs_iflush when |
873 | * from xfs_iflush when the filesystem is shutting down to clean | 801 | * the filesystem is shutting down to clean up the inode state. It is |
874 | * up the inode state. | 802 | * responsible for removing the inode item from the AIL if it has not been |
875 | * It is responsible for removing the inode item | 803 | * re-logged, and unlocking the inode's flush lock. |
876 | * from the AIL if it has not been re-logged, and unlocking the inode's | ||
877 | * flush lock. | ||
878 | */ | 804 | */ |
879 | void | 805 | void |
880 | xfs_iflush_abort( | 806 | xfs_iflush_abort( |
881 | xfs_inode_t *ip) | 807 | xfs_inode_t *ip, |
808 | bool stale) | ||
882 | { | 809 | { |
883 | xfs_inode_log_item_t *iip = ip->i_itemp; | 810 | xfs_inode_log_item_t *iip = ip->i_itemp; |
884 | 811 | ||
@@ -888,7 +815,10 @@ xfs_iflush_abort( | |||
888 | spin_lock(&ailp->xa_lock); | 815 | spin_lock(&ailp->xa_lock); |
889 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { | 816 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { |
890 | /* xfs_trans_ail_delete() drops the AIL lock. */ | 817 | /* xfs_trans_ail_delete() drops the AIL lock. */ |
891 | xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip); | 818 | xfs_trans_ail_delete(ailp, &iip->ili_item, |
819 | stale ? | ||
820 | SHUTDOWN_LOG_IO_ERROR : | ||
821 | SHUTDOWN_CORRUPT_INCORE); | ||
892 | } else | 822 | } else |
893 | spin_unlock(&ailp->xa_lock); | 823 | spin_unlock(&ailp->xa_lock); |
894 | } | 824 | } |
@@ -915,7 +845,7 @@ xfs_istale_done( | |||
915 | struct xfs_buf *bp, | 845 | struct xfs_buf *bp, |
916 | struct xfs_log_item *lip) | 846 | struct xfs_log_item *lip) |
917 | { | 847 | { |
918 | xfs_iflush_abort(INODE_ITEM(lip)->ili_inode); | 848 | xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true); |
919 | } | 849 | } |
920 | 850 | ||
921 | /* | 851 | /* |
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 41d61c3b7a36..376d4d0b2635 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h | |||
@@ -165,7 +165,7 @@ extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); | |||
165 | extern void xfs_inode_item_destroy(struct xfs_inode *); | 165 | extern void xfs_inode_item_destroy(struct xfs_inode *); |
166 | extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *); | 166 | extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *); |
167 | extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *); | 167 | extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *); |
168 | extern void xfs_iflush_abort(struct xfs_inode *); | 168 | extern void xfs_iflush_abort(struct xfs_inode *, bool); |
169 | extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, | 169 | extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, |
170 | xfs_inode_log_format_t *); | 170 | xfs_inode_log_format_t *); |
171 | 171 | ||
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h index b253c0ea5bec..90efdaf1706f 100644 --- a/fs/xfs/xfs_inum.h +++ b/fs/xfs/xfs_inum.h | |||
@@ -26,11 +26,6 @@ | |||
26 | * high agno_log-agblklog-inopblog bits - 0 | 26 | * high agno_log-agblklog-inopblog bits - 0 |
27 | */ | 27 | */ |
28 | 28 | ||
29 | typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */ | ||
30 | |||
31 | #define NULLFSINO ((xfs_ino_t)-1) | ||
32 | #define NULLAGINO ((xfs_agino_t)-1) | ||
33 | |||
34 | struct xfs_mount; | 29 | struct xfs_mount; |
35 | 30 | ||
36 | #define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1) | 31 | #define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1) |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 91f8ff547ab3..3a05a41b5d76 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -17,9 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | ||
21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index a849a5473aff..c4f2da0d2bf5 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
@@ -22,9 +22,7 @@ | |||
22 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
23 | #include "xfs.h" | 23 | #include "xfs.h" |
24 | #include "xfs_fs.h" | 24 | #include "xfs_fs.h" |
25 | #include "xfs_bit.h" | ||
26 | #include "xfs_log.h" | 25 | #include "xfs_log.h" |
27 | #include "xfs_inum.h" | ||
28 | #include "xfs_trans.h" | 26 | #include "xfs_trans.h" |
29 | #include "xfs_sb.h" | 27 | #include "xfs_sb.h" |
30 | #include "xfs_ag.h" | 28 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 71a464503c43..aadfce6681ee 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -17,9 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | ||
21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
@@ -37,7 +35,6 @@ | |||
37 | #include "xfs_rtalloc.h" | 35 | #include "xfs_rtalloc.h" |
38 | #include "xfs_error.h" | 36 | #include "xfs_error.h" |
39 | #include "xfs_itable.h" | 37 | #include "xfs_itable.h" |
40 | #include "xfs_rw.h" | ||
41 | #include "xfs_attr.h" | 38 | #include "xfs_attr.h" |
42 | #include "xfs_buf_item.h" | 39 | #include "xfs_buf_item.h" |
43 | #include "xfs_trans_space.h" | 40 | #include "xfs_trans_space.h" |
@@ -142,11 +139,7 @@ xfs_iomap_write_direct( | |||
142 | int committed; | 139 | int committed; |
143 | int error; | 140 | int error; |
144 | 141 | ||
145 | /* | 142 | error = xfs_qm_dqattach(ip, 0); |
146 | * Make sure that the dquots are there. This doesn't hold | ||
147 | * the ilock across a disk read. | ||
148 | */ | ||
149 | error = xfs_qm_dqattach_locked(ip, 0); | ||
150 | if (error) | 143 | if (error) |
151 | return XFS_ERROR(error); | 144 | return XFS_ERROR(error); |
152 | 145 | ||
@@ -158,7 +151,7 @@ xfs_iomap_write_direct( | |||
158 | if ((offset + count) > XFS_ISIZE(ip)) { | 151 | if ((offset + count) > XFS_ISIZE(ip)) { |
159 | error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); | 152 | error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); |
160 | if (error) | 153 | if (error) |
161 | goto error_out; | 154 | return XFS_ERROR(error); |
162 | } else { | 155 | } else { |
163 | if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) | 156 | if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) |
164 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) | 157 | last_fsb = MIN(last_fsb, (xfs_fileoff_t) |
@@ -190,7 +183,6 @@ xfs_iomap_write_direct( | |||
190 | /* | 183 | /* |
191 | * Allocate and setup the transaction | 184 | * Allocate and setup the transaction |
192 | */ | 185 | */ |
193 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
194 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); | 186 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); |
195 | error = xfs_trans_reserve(tp, resblks, | 187 | error = xfs_trans_reserve(tp, resblks, |
196 | XFS_WRITE_LOG_RES(mp), resrtextents, | 188 | XFS_WRITE_LOG_RES(mp), resrtextents, |
@@ -199,15 +191,16 @@ xfs_iomap_write_direct( | |||
199 | /* | 191 | /* |
200 | * Check for running out of space, note: need lock to return | 192 | * Check for running out of space, note: need lock to return |
201 | */ | 193 | */ |
202 | if (error) | 194 | if (error) { |
203 | xfs_trans_cancel(tp, 0); | 195 | xfs_trans_cancel(tp, 0); |
196 | return XFS_ERROR(error); | ||
197 | } | ||
198 | |||
204 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 199 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
205 | if (error) | ||
206 | goto error_out; | ||
207 | 200 | ||
208 | error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); | 201 | error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); |
209 | if (error) | 202 | if (error) |
210 | goto error1; | 203 | goto out_trans_cancel; |
211 | 204 | ||
212 | xfs_trans_ijoin(tp, ip, 0); | 205 | xfs_trans_ijoin(tp, ip, 0); |
213 | 206 | ||
@@ -224,42 +217,39 @@ xfs_iomap_write_direct( | |||
224 | error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, | 217 | error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, |
225 | &firstfsb, 0, imap, &nimaps, &free_list); | 218 | &firstfsb, 0, imap, &nimaps, &free_list); |
226 | if (error) | 219 | if (error) |
227 | goto error0; | 220 | goto out_bmap_cancel; |
228 | 221 | ||
229 | /* | 222 | /* |
230 | * Complete the transaction | 223 | * Complete the transaction |
231 | */ | 224 | */ |
232 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 225 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
233 | if (error) | 226 | if (error) |
234 | goto error0; | 227 | goto out_bmap_cancel; |
235 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 228 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
236 | if (error) | 229 | if (error) |
237 | goto error_out; | 230 | goto out_unlock; |
238 | 231 | ||
239 | /* | 232 | /* |
240 | * Copy any maps to caller's array and return any error. | 233 | * Copy any maps to caller's array and return any error. |
241 | */ | 234 | */ |
242 | if (nimaps == 0) { | 235 | if (nimaps == 0) { |
243 | error = ENOSPC; | 236 | error = XFS_ERROR(ENOSPC); |
244 | goto error_out; | 237 | goto out_unlock; |
245 | } | 238 | } |
246 | 239 | ||
247 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { | 240 | if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) |
248 | error = xfs_alert_fsblock_zero(ip, imap); | 241 | error = xfs_alert_fsblock_zero(ip, imap); |
249 | goto error_out; | ||
250 | } | ||
251 | 242 | ||
252 | return 0; | 243 | out_unlock: |
244 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
245 | return error; | ||
253 | 246 | ||
254 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | 247 | out_bmap_cancel: |
255 | xfs_bmap_cancel(&free_list); | 248 | xfs_bmap_cancel(&free_list); |
256 | xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); | 249 | xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); |
257 | 250 | out_trans_cancel: | |
258 | error1: /* Just cancel transaction */ | ||
259 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 251 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
260 | 252 | goto out_unlock; | |
261 | error_out: | ||
262 | return XFS_ERROR(error); | ||
263 | } | 253 | } |
264 | 254 | ||
265 | /* | 255 | /* |
@@ -422,6 +412,15 @@ retry: | |||
422 | return error; | 412 | return error; |
423 | } | 413 | } |
424 | 414 | ||
415 | /* | ||
416 | * Make sure preallocation does not create extents beyond the range we | ||
417 | * actually support in this filesystem. | ||
418 | */ | ||
419 | if (last_fsb > XFS_B_TO_FSB(mp, mp->m_maxioffset)) | ||
420 | last_fsb = XFS_B_TO_FSB(mp, mp->m_maxioffset); | ||
421 | |||
422 | ASSERT(last_fsb > offset_fsb); | ||
423 | |||
425 | nimaps = XFS_WRITE_IMAPS; | 424 | nimaps = XFS_WRITE_IMAPS; |
426 | error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb, | 425 | error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb, |
427 | imap, &nimaps, XFS_BMAPI_ENTIRE); | 426 | imap, &nimaps, XFS_BMAPI_ENTIRE); |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 3011b879f850..1a25fd802798 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -18,9 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_acl.h" | 20 | #include "xfs_acl.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
@@ -34,7 +32,6 @@ | |||
34 | #include "xfs_rtalloc.h" | 32 | #include "xfs_rtalloc.h" |
35 | #include "xfs_error.h" | 33 | #include "xfs_error.h" |
36 | #include "xfs_itable.h" | 34 | #include "xfs_itable.h" |
37 | #include "xfs_rw.h" | ||
38 | #include "xfs_attr.h" | 35 | #include "xfs_attr.h" |
39 | #include "xfs_buf_item.h" | 36 | #include "xfs_buf_item.h" |
40 | #include "xfs_utils.h" | 37 | #include "xfs_utils.h" |
@@ -700,7 +697,7 @@ xfs_setattr_size( | |||
700 | xfs_off_t oldsize, newsize; | 697 | xfs_off_t oldsize, newsize; |
701 | struct xfs_trans *tp; | 698 | struct xfs_trans *tp; |
702 | int error; | 699 | int error; |
703 | uint lock_flags; | 700 | uint lock_flags = 0; |
704 | uint commit_flags = 0; | 701 | uint commit_flags = 0; |
705 | 702 | ||
706 | trace_xfs_setattr(ip); | 703 | trace_xfs_setattr(ip); |
@@ -720,10 +717,10 @@ xfs_setattr_size( | |||
720 | ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| | 717 | ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| |
721 | ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); | 718 | ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); |
722 | 719 | ||
723 | lock_flags = XFS_ILOCK_EXCL; | 720 | if (!(flags & XFS_ATTR_NOLOCK)) { |
724 | if (!(flags & XFS_ATTR_NOLOCK)) | ||
725 | lock_flags |= XFS_IOLOCK_EXCL; | 721 | lock_flags |= XFS_IOLOCK_EXCL; |
726 | xfs_ilock(ip, lock_flags); | 722 | xfs_ilock(ip, lock_flags); |
723 | } | ||
727 | 724 | ||
728 | oldsize = inode->i_size; | 725 | oldsize = inode->i_size; |
729 | newsize = iattr->ia_size; | 726 | newsize = iattr->ia_size; |
@@ -746,7 +743,7 @@ xfs_setattr_size( | |||
746 | /* | 743 | /* |
747 | * Make sure that the dquots are attached to the inode. | 744 | * Make sure that the dquots are attached to the inode. |
748 | */ | 745 | */ |
749 | error = xfs_qm_dqattach_locked(ip, 0); | 746 | error = xfs_qm_dqattach(ip, 0); |
750 | if (error) | 747 | if (error) |
751 | goto out_unlock; | 748 | goto out_unlock; |
752 | 749 | ||
@@ -768,8 +765,6 @@ xfs_setattr_size( | |||
768 | if (error) | 765 | if (error) |
769 | goto out_unlock; | 766 | goto out_unlock; |
770 | } | 767 | } |
771 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
772 | lock_flags &= ~XFS_ILOCK_EXCL; | ||
773 | 768 | ||
774 | /* | 769 | /* |
775 | * We are going to log the inode size change in this transaction so | 770 | * We are going to log the inode size change in this transaction so |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index acc2bf264dab..eff577a9b67f 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6db1fef38bff..6b965bf450e4 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -18,9 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
@@ -35,7 +33,6 @@ | |||
35 | #include "xfs_trans_priv.h" | 33 | #include "xfs_trans_priv.h" |
36 | #include "xfs_dinode.h" | 34 | #include "xfs_dinode.h" |
37 | #include "xfs_inode.h" | 35 | #include "xfs_inode.h" |
38 | #include "xfs_rw.h" | ||
39 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
40 | 37 | ||
41 | kmem_zone_t *xfs_log_ticket_zone; | 38 | kmem_zone_t *xfs_log_ticket_zone; |
@@ -916,27 +913,42 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
916 | * We may be holding the log iclog lock upon entering this routine. | 913 | * We may be holding the log iclog lock upon entering this routine. |
917 | */ | 914 | */ |
918 | xfs_lsn_t | 915 | xfs_lsn_t |
919 | xlog_assign_tail_lsn( | 916 | xlog_assign_tail_lsn_locked( |
920 | struct xfs_mount *mp) | 917 | struct xfs_mount *mp) |
921 | { | 918 | { |
922 | xfs_lsn_t tail_lsn; | ||
923 | struct log *log = mp->m_log; | 919 | struct log *log = mp->m_log; |
920 | struct xfs_log_item *lip; | ||
921 | xfs_lsn_t tail_lsn; | ||
922 | |||
923 | assert_spin_locked(&mp->m_ail->xa_lock); | ||
924 | 924 | ||
925 | /* | 925 | /* |
926 | * To make sure we always have a valid LSN for the log tail we keep | 926 | * To make sure we always have a valid LSN for the log tail we keep |
927 | * track of the last LSN which was committed in log->l_last_sync_lsn, | 927 | * track of the last LSN which was committed in log->l_last_sync_lsn, |
928 | * and use that when the AIL was empty and xfs_ail_min_lsn returns 0. | 928 | * and use that when the AIL was empty. |
929 | * | ||
930 | * If the AIL has been emptied we also need to wake any process | ||
931 | * waiting for this condition. | ||
932 | */ | 929 | */ |
933 | tail_lsn = xfs_ail_min_lsn(mp->m_ail); | 930 | lip = xfs_ail_min(mp->m_ail); |
934 | if (!tail_lsn) | 931 | if (lip) |
932 | tail_lsn = lip->li_lsn; | ||
933 | else | ||
935 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); | 934 | tail_lsn = atomic64_read(&log->l_last_sync_lsn); |
936 | atomic64_set(&log->l_tail_lsn, tail_lsn); | 935 | atomic64_set(&log->l_tail_lsn, tail_lsn); |
937 | return tail_lsn; | 936 | return tail_lsn; |
938 | } | 937 | } |
939 | 938 | ||
939 | xfs_lsn_t | ||
940 | xlog_assign_tail_lsn( | ||
941 | struct xfs_mount *mp) | ||
942 | { | ||
943 | xfs_lsn_t tail_lsn; | ||
944 | |||
945 | spin_lock(&mp->m_ail->xa_lock); | ||
946 | tail_lsn = xlog_assign_tail_lsn_locked(mp); | ||
947 | spin_unlock(&mp->m_ail->xa_lock); | ||
948 | |||
949 | return tail_lsn; | ||
950 | } | ||
951 | |||
940 | /* | 952 | /* |
941 | * Return the space in the log between the tail and the head. The head | 953 | * Return the space in the log between the tail and the head. The head |
942 | * is passed in the cycle/bytes formal parms. In the special case where | 954 | * is passed in the cycle/bytes formal parms. In the special case where |
@@ -1172,7 +1184,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1172 | xlog_get_iclog_buffer_size(mp, log); | 1184 | xlog_get_iclog_buffer_size(mp, log); |
1173 | 1185 | ||
1174 | error = ENOMEM; | 1186 | error = ENOMEM; |
1175 | bp = xfs_buf_alloc(mp->m_logdev_targp, 0, log->l_iclog_size, 0); | 1187 | bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); |
1176 | if (!bp) | 1188 | if (!bp) |
1177 | goto out_free_log; | 1189 | goto out_free_log; |
1178 | bp->b_iodone = xlog_iodone; | 1190 | bp->b_iodone = xlog_iodone; |
@@ -1182,9 +1194,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1182 | spin_lock_init(&log->l_icloglock); | 1194 | spin_lock_init(&log->l_icloglock); |
1183 | init_waitqueue_head(&log->l_flush_wait); | 1195 | init_waitqueue_head(&log->l_flush_wait); |
1184 | 1196 | ||
1185 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | ||
1186 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | ||
1187 | |||
1188 | iclogp = &log->l_iclog; | 1197 | iclogp = &log->l_iclog; |
1189 | /* | 1198 | /* |
1190 | * The amount of memory to allocate for the iclog structure is | 1199 | * The amount of memory to allocate for the iclog structure is |
@@ -1204,7 +1213,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1204 | prev_iclog = iclog; | 1213 | prev_iclog = iclog; |
1205 | 1214 | ||
1206 | bp = xfs_buf_get_uncached(mp->m_logdev_targp, | 1215 | bp = xfs_buf_get_uncached(mp->m_logdev_targp, |
1207 | log->l_iclog_size, 0); | 1216 | BTOBB(log->l_iclog_size), 0); |
1208 | if (!bp) | 1217 | if (!bp) |
1209 | goto out_free_iclog; | 1218 | goto out_free_iclog; |
1210 | 1219 | ||
@@ -1224,7 +1233,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1224 | head->h_fmt = cpu_to_be32(XLOG_FMT); | 1233 | head->h_fmt = cpu_to_be32(XLOG_FMT); |
1225 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); | 1234 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); |
1226 | 1235 | ||
1227 | iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; | 1236 | iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize; |
1228 | iclog->ic_state = XLOG_STATE_ACTIVE; | 1237 | iclog->ic_state = XLOG_STATE_ACTIVE; |
1229 | iclog->ic_log = log; | 1238 | iclog->ic_log = log; |
1230 | atomic_set(&iclog->ic_refcnt, 0); | 1239 | atomic_set(&iclog->ic_refcnt, 0); |
@@ -1475,7 +1484,7 @@ xlog_sync(xlog_t *log, | |||
1475 | } else { | 1484 | } else { |
1476 | iclog->ic_bwritecnt = 1; | 1485 | iclog->ic_bwritecnt = 1; |
1477 | } | 1486 | } |
1478 | XFS_BUF_SET_COUNT(bp, count); | 1487 | bp->b_io_length = BTOBB(count); |
1479 | bp->b_fspriv = iclog; | 1488 | bp->b_fspriv = iclog; |
1480 | XFS_BUF_ZEROFLAGS(bp); | 1489 | XFS_BUF_ZEROFLAGS(bp); |
1481 | XFS_BUF_ASYNC(bp); | 1490 | XFS_BUF_ASYNC(bp); |
@@ -1573,7 +1582,7 @@ xlog_dealloc_log(xlog_t *log) | |||
1573 | * always need to ensure that the extra buffer does not point to memory | 1582 | * always need to ensure that the extra buffer does not point to memory |
1574 | * owned by another log buffer before we free it. | 1583 | * owned by another log buffer before we free it. |
1575 | */ | 1584 | */ |
1576 | xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size); | 1585 | xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); |
1577 | xfs_buf_free(log->l_xbuf); | 1586 | xfs_buf_free(log->l_xbuf); |
1578 | 1587 | ||
1579 | iclog = log->l_iclog; | 1588 | iclog = log->l_iclog; |
@@ -2932,6 +2941,7 @@ xfs_log_force( | |||
2932 | { | 2941 | { |
2933 | int error; | 2942 | int error; |
2934 | 2943 | ||
2944 | trace_xfs_log_force(mp, 0); | ||
2935 | error = _xfs_log_force(mp, flags, NULL); | 2945 | error = _xfs_log_force(mp, flags, NULL); |
2936 | if (error) | 2946 | if (error) |
2937 | xfs_warn(mp, "%s: error %d returned.", __func__, error); | 2947 | xfs_warn(mp, "%s: error %d returned.", __func__, error); |
@@ -3080,6 +3090,7 @@ xfs_log_force_lsn( | |||
3080 | { | 3090 | { |
3081 | int error; | 3091 | int error; |
3082 | 3092 | ||
3093 | trace_xfs_log_force(mp, lsn); | ||
3083 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); | 3094 | error = _xfs_log_force_lsn(mp, lsn, flags, NULL); |
3084 | if (error) | 3095 | if (error) |
3085 | xfs_warn(mp, "%s: error %d returned.", __func__, error); | 3096 | xfs_warn(mp, "%s: error %d returned.", __func__, error); |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 2c622bedb302..748d312850e2 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -152,6 +152,7 @@ int xfs_log_mount(struct xfs_mount *mp, | |||
152 | int num_bblocks); | 152 | int num_bblocks); |
153 | int xfs_log_mount_finish(struct xfs_mount *mp); | 153 | int xfs_log_mount_finish(struct xfs_mount *mp); |
154 | xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); | 154 | xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); |
155 | xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); | ||
155 | void xfs_log_space_wake(struct xfs_mount *mp); | 156 | void xfs_log_space_wake(struct xfs_mount *mp); |
156 | int xfs_log_notify(struct xfs_mount *mp, | 157 | int xfs_log_notify(struct xfs_mount *mp, |
157 | struct xlog_in_core *iclog, | 158 | struct xlog_in_core *iclog, |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index d4fadbe8ac90..7d6197c58493 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -18,9 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_trans_priv.h" | 23 | #include "xfs_trans_priv.h" |
26 | #include "xfs_log_priv.h" | 24 | #include "xfs_log_priv.h" |
@@ -29,61 +27,10 @@ | |||
29 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
30 | #include "xfs_error.h" | 28 | #include "xfs_error.h" |
31 | #include "xfs_alloc.h" | 29 | #include "xfs_alloc.h" |
30 | #include "xfs_extent_busy.h" | ||
32 | #include "xfs_discard.h" | 31 | #include "xfs_discard.h" |
33 | 32 | ||
34 | /* | 33 | /* |
35 | * Perform initial CIL structure initialisation. | ||
36 | */ | ||
37 | int | ||
38 | xlog_cil_init( | ||
39 | struct log *log) | ||
40 | { | ||
41 | struct xfs_cil *cil; | ||
42 | struct xfs_cil_ctx *ctx; | ||
43 | |||
44 | cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); | ||
45 | if (!cil) | ||
46 | return ENOMEM; | ||
47 | |||
48 | ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); | ||
49 | if (!ctx) { | ||
50 | kmem_free(cil); | ||
51 | return ENOMEM; | ||
52 | } | ||
53 | |||
54 | INIT_LIST_HEAD(&cil->xc_cil); | ||
55 | INIT_LIST_HEAD(&cil->xc_committing); | ||
56 | spin_lock_init(&cil->xc_cil_lock); | ||
57 | init_rwsem(&cil->xc_ctx_lock); | ||
58 | init_waitqueue_head(&cil->xc_commit_wait); | ||
59 | |||
60 | INIT_LIST_HEAD(&ctx->committing); | ||
61 | INIT_LIST_HEAD(&ctx->busy_extents); | ||
62 | ctx->sequence = 1; | ||
63 | ctx->cil = cil; | ||
64 | cil->xc_ctx = ctx; | ||
65 | cil->xc_current_sequence = ctx->sequence; | ||
66 | |||
67 | cil->xc_log = log; | ||
68 | log->l_cilp = cil; | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | void | ||
73 | xlog_cil_destroy( | ||
74 | struct log *log) | ||
75 | { | ||
76 | if (log->l_cilp->xc_ctx) { | ||
77 | if (log->l_cilp->xc_ctx->ticket) | ||
78 | xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); | ||
79 | kmem_free(log->l_cilp->xc_ctx); | ||
80 | } | ||
81 | |||
82 | ASSERT(list_empty(&log->l_cilp->xc_cil)); | ||
83 | kmem_free(log->l_cilp); | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Allocate a new ticket. Failing to get a new ticket makes it really hard to | 34 | * Allocate a new ticket. Failing to get a new ticket makes it really hard to |
88 | * recover, so we don't allow failure here. Also, we allocate in a context that | 35 | * recover, so we don't allow failure here. Also, we allocate in a context that |
89 | * we don't want to be issuing transactions from, so we need to tell the | 36 | * we don't want to be issuing transactions from, so we need to tell the |
@@ -390,8 +337,8 @@ xlog_cil_committed( | |||
390 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, | 337 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, |
391 | ctx->start_lsn, abort); | 338 | ctx->start_lsn, abort); |
392 | 339 | ||
393 | xfs_alloc_busy_sort(&ctx->busy_extents); | 340 | xfs_extent_busy_sort(&ctx->busy_extents); |
394 | xfs_alloc_busy_clear(mp, &ctx->busy_extents, | 341 | xfs_extent_busy_clear(mp, &ctx->busy_extents, |
395 | (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); | 342 | (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); |
396 | 343 | ||
397 | spin_lock(&ctx->cil->xc_cil_lock); | 344 | spin_lock(&ctx->cil->xc_cil_lock); |
@@ -404,7 +351,7 @@ xlog_cil_committed( | |||
404 | ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); | 351 | ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); |
405 | 352 | ||
406 | xfs_discard_extents(mp, &ctx->busy_extents); | 353 | xfs_discard_extents(mp, &ctx->busy_extents); |
407 | xfs_alloc_busy_clear(mp, &ctx->busy_extents, false); | 354 | xfs_extent_busy_clear(mp, &ctx->busy_extents, false); |
408 | } | 355 | } |
409 | 356 | ||
410 | kmem_free(ctx); | 357 | kmem_free(ctx); |
@@ -426,8 +373,7 @@ xlog_cil_committed( | |||
426 | */ | 373 | */ |
427 | STATIC int | 374 | STATIC int |
428 | xlog_cil_push( | 375 | xlog_cil_push( |
429 | struct log *log, | 376 | struct log *log) |
430 | xfs_lsn_t push_seq) | ||
431 | { | 377 | { |
432 | struct xfs_cil *cil = log->l_cilp; | 378 | struct xfs_cil *cil = log->l_cilp; |
433 | struct xfs_log_vec *lv; | 379 | struct xfs_log_vec *lv; |
@@ -443,39 +389,36 @@ xlog_cil_push( | |||
443 | struct xfs_log_iovec lhdr; | 389 | struct xfs_log_iovec lhdr; |
444 | struct xfs_log_vec lvhdr = { NULL }; | 390 | struct xfs_log_vec lvhdr = { NULL }; |
445 | xfs_lsn_t commit_lsn; | 391 | xfs_lsn_t commit_lsn; |
392 | xfs_lsn_t push_seq; | ||
446 | 393 | ||
447 | if (!cil) | 394 | if (!cil) |
448 | return 0; | 395 | return 0; |
449 | 396 | ||
450 | ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); | ||
451 | |||
452 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 397 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
453 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 398 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
454 | 399 | ||
455 | /* | 400 | down_write(&cil->xc_ctx_lock); |
456 | * Lock out transaction commit, but don't block for background pushes | ||
457 | * unless we are well over the CIL space limit. See the definition of | ||
458 | * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic | ||
459 | * used here. | ||
460 | */ | ||
461 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | ||
462 | if (!push_seq && | ||
463 | cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) | ||
464 | goto out_free_ticket; | ||
465 | down_write(&cil->xc_ctx_lock); | ||
466 | } | ||
467 | ctx = cil->xc_ctx; | 401 | ctx = cil->xc_ctx; |
468 | 402 | ||
469 | /* check if we've anything to push */ | 403 | spin_lock(&cil->xc_cil_lock); |
470 | if (list_empty(&cil->xc_cil)) | 404 | push_seq = cil->xc_push_seq; |
471 | goto out_skip; | 405 | ASSERT(push_seq <= ctx->sequence); |
472 | 406 | ||
473 | /* check for spurious background flush */ | 407 | /* |
474 | if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | 408 | * Check if we've anything to push. If there is nothing, then we don't |
409 | * move on to a new sequence number and so we have to be able to push | ||
410 | * this sequence again later. | ||
411 | */ | ||
412 | if (list_empty(&cil->xc_cil)) { | ||
413 | cil->xc_push_seq = 0; | ||
414 | spin_unlock(&cil->xc_cil_lock); | ||
475 | goto out_skip; | 415 | goto out_skip; |
416 | } | ||
417 | spin_unlock(&cil->xc_cil_lock); | ||
418 | |||
476 | 419 | ||
477 | /* check for a previously pushed seqeunce */ | 420 | /* check for a previously pushed seqeunce */ |
478 | if (push_seq && push_seq < cil->xc_ctx->sequence) | 421 | if (push_seq < cil->xc_ctx->sequence) |
479 | goto out_skip; | 422 | goto out_skip; |
480 | 423 | ||
481 | /* | 424 | /* |
@@ -629,7 +572,6 @@ restart: | |||
629 | 572 | ||
630 | out_skip: | 573 | out_skip: |
631 | up_write(&cil->xc_ctx_lock); | 574 | up_write(&cil->xc_ctx_lock); |
632 | out_free_ticket: | ||
633 | xfs_log_ticket_put(new_ctx->ticket); | 575 | xfs_log_ticket_put(new_ctx->ticket); |
634 | kmem_free(new_ctx); | 576 | kmem_free(new_ctx); |
635 | return 0; | 577 | return 0; |
@@ -641,6 +583,82 @@ out_abort: | |||
641 | return XFS_ERROR(EIO); | 583 | return XFS_ERROR(EIO); |
642 | } | 584 | } |
643 | 585 | ||
586 | static void | ||
587 | xlog_cil_push_work( | ||
588 | struct work_struct *work) | ||
589 | { | ||
590 | struct xfs_cil *cil = container_of(work, struct xfs_cil, | ||
591 | xc_push_work); | ||
592 | xlog_cil_push(cil->xc_log); | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * We need to push CIL every so often so we don't cache more than we can fit in | ||
597 | * the log. The limit really is that a checkpoint can't be more than half the | ||
598 | * log (the current checkpoint is not allowed to overwrite the previous | ||
599 | * checkpoint), but commit latency and memory usage limit this to a smaller | ||
600 | * size. | ||
601 | */ | ||
602 | static void | ||
603 | xlog_cil_push_background( | ||
604 | struct log *log) | ||
605 | { | ||
606 | struct xfs_cil *cil = log->l_cilp; | ||
607 | |||
608 | /* | ||
609 | * The cil won't be empty because we are called while holding the | ||
610 | * context lock so whatever we added to the CIL will still be there | ||
611 | */ | ||
612 | ASSERT(!list_empty(&cil->xc_cil)); | ||
613 | |||
614 | /* | ||
615 | * don't do a background push if we haven't used up all the | ||
616 | * space available yet. | ||
617 | */ | ||
618 | if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | ||
619 | return; | ||
620 | |||
621 | spin_lock(&cil->xc_cil_lock); | ||
622 | if (cil->xc_push_seq < cil->xc_current_sequence) { | ||
623 | cil->xc_push_seq = cil->xc_current_sequence; | ||
624 | queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); | ||
625 | } | ||
626 | spin_unlock(&cil->xc_cil_lock); | ||
627 | |||
628 | } | ||
629 | |||
630 | static void | ||
631 | xlog_cil_push_foreground( | ||
632 | struct log *log, | ||
633 | xfs_lsn_t push_seq) | ||
634 | { | ||
635 | struct xfs_cil *cil = log->l_cilp; | ||
636 | |||
637 | if (!cil) | ||
638 | return; | ||
639 | |||
640 | ASSERT(push_seq && push_seq <= cil->xc_current_sequence); | ||
641 | |||
642 | /* start on any pending background push to minimise wait time on it */ | ||
643 | flush_work(&cil->xc_push_work); | ||
644 | |||
645 | /* | ||
646 | * If the CIL is empty or we've already pushed the sequence then | ||
647 | * there's no work we need to do. | ||
648 | */ | ||
649 | spin_lock(&cil->xc_cil_lock); | ||
650 | if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) { | ||
651 | spin_unlock(&cil->xc_cil_lock); | ||
652 | return; | ||
653 | } | ||
654 | |||
655 | cil->xc_push_seq = push_seq; | ||
656 | spin_unlock(&cil->xc_cil_lock); | ||
657 | |||
658 | /* do the push now */ | ||
659 | xlog_cil_push(log); | ||
660 | } | ||
661 | |||
644 | /* | 662 | /* |
645 | * Commit a transaction with the given vector to the Committed Item List. | 663 | * Commit a transaction with the given vector to the Committed Item List. |
646 | * | 664 | * |
@@ -667,7 +685,6 @@ xfs_log_commit_cil( | |||
667 | { | 685 | { |
668 | struct log *log = mp->m_log; | 686 | struct log *log = mp->m_log; |
669 | int log_flags = 0; | 687 | int log_flags = 0; |
670 | int push = 0; | ||
671 | struct xfs_log_vec *log_vector; | 688 | struct xfs_log_vec *log_vector; |
672 | 689 | ||
673 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | 690 | if (flags & XFS_TRANS_RELEASE_LOG_RES) |
@@ -719,21 +736,9 @@ xfs_log_commit_cil( | |||
719 | */ | 736 | */ |
720 | xfs_trans_free_items(tp, *commit_lsn, 0); | 737 | xfs_trans_free_items(tp, *commit_lsn, 0); |
721 | 738 | ||
722 | /* check for background commit before unlock */ | 739 | xlog_cil_push_background(log); |
723 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
724 | push = 1; | ||
725 | 740 | ||
726 | up_read(&log->l_cilp->xc_ctx_lock); | 741 | up_read(&log->l_cilp->xc_ctx_lock); |
727 | |||
728 | /* | ||
729 | * We need to push CIL every so often so we don't cache more than we | ||
730 | * can fit in the log. The limit really is that a checkpoint can't be | ||
731 | * more than half the log (the current checkpoint is not allowed to | ||
732 | * overwrite the previous checkpoint), but commit latency and memory | ||
733 | * usage limit this to a smaller size in most cases. | ||
734 | */ | ||
735 | if (push) | ||
736 | xlog_cil_push(log, 0); | ||
737 | return 0; | 742 | return 0; |
738 | } | 743 | } |
739 | 744 | ||
@@ -746,9 +751,6 @@ xfs_log_commit_cil( | |||
746 | * | 751 | * |
747 | * We return the current commit lsn to allow the callers to determine if a | 752 | * We return the current commit lsn to allow the callers to determine if a |
748 | * iclog flush is necessary following this call. | 753 | * iclog flush is necessary following this call. |
749 | * | ||
750 | * XXX: Initially, just push the CIL unconditionally and return whatever | ||
751 | * commit lsn is there. It'll be empty, so this is broken for now. | ||
752 | */ | 754 | */ |
753 | xfs_lsn_t | 755 | xfs_lsn_t |
754 | xlog_cil_force_lsn( | 756 | xlog_cil_force_lsn( |
@@ -766,8 +768,7 @@ xlog_cil_force_lsn( | |||
766 | * xlog_cil_push() handles racing pushes for the same sequence, | 768 | * xlog_cil_push() handles racing pushes for the same sequence, |
767 | * so no need to deal with it here. | 769 | * so no need to deal with it here. |
768 | */ | 770 | */ |
769 | if (sequence == cil->xc_current_sequence) | 771 | xlog_cil_push_foreground(log, sequence); |
770 | xlog_cil_push(log, sequence); | ||
771 | 772 | ||
772 | /* | 773 | /* |
773 | * See if we can find a previous sequence still committing. | 774 | * See if we can find a previous sequence still committing. |
@@ -826,3 +827,57 @@ xfs_log_item_in_current_chkpt( | |||
826 | return false; | 827 | return false; |
827 | return true; | 828 | return true; |
828 | } | 829 | } |
830 | |||
831 | /* | ||
832 | * Perform initial CIL structure initialisation. | ||
833 | */ | ||
834 | int | ||
835 | xlog_cil_init( | ||
836 | struct log *log) | ||
837 | { | ||
838 | struct xfs_cil *cil; | ||
839 | struct xfs_cil_ctx *ctx; | ||
840 | |||
841 | cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); | ||
842 | if (!cil) | ||
843 | return ENOMEM; | ||
844 | |||
845 | ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); | ||
846 | if (!ctx) { | ||
847 | kmem_free(cil); | ||
848 | return ENOMEM; | ||
849 | } | ||
850 | |||
851 | INIT_WORK(&cil->xc_push_work, xlog_cil_push_work); | ||
852 | INIT_LIST_HEAD(&cil->xc_cil); | ||
853 | INIT_LIST_HEAD(&cil->xc_committing); | ||
854 | spin_lock_init(&cil->xc_cil_lock); | ||
855 | init_rwsem(&cil->xc_ctx_lock); | ||
856 | init_waitqueue_head(&cil->xc_commit_wait); | ||
857 | |||
858 | INIT_LIST_HEAD(&ctx->committing); | ||
859 | INIT_LIST_HEAD(&ctx->busy_extents); | ||
860 | ctx->sequence = 1; | ||
861 | ctx->cil = cil; | ||
862 | cil->xc_ctx = ctx; | ||
863 | cil->xc_current_sequence = ctx->sequence; | ||
864 | |||
865 | cil->xc_log = log; | ||
866 | log->l_cilp = cil; | ||
867 | return 0; | ||
868 | } | ||
869 | |||
870 | void | ||
871 | xlog_cil_destroy( | ||
872 | struct log *log) | ||
873 | { | ||
874 | if (log->l_cilp->xc_ctx) { | ||
875 | if (log->l_cilp->xc_ctx->ticket) | ||
876 | xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); | ||
877 | kmem_free(log->l_cilp->xc_ctx); | ||
878 | } | ||
879 | |||
880 | ASSERT(list_empty(&log->l_cilp->xc_cil)); | ||
881 | kmem_free(log->l_cilp); | ||
882 | } | ||
883 | |||
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 2152900b79d4..735ff1ee53da 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -417,6 +417,8 @@ struct xfs_cil { | |||
417 | struct list_head xc_committing; | 417 | struct list_head xc_committing; |
418 | wait_queue_head_t xc_commit_wait; | 418 | wait_queue_head_t xc_commit_wait; |
419 | xfs_lsn_t xc_current_sequence; | 419 | xfs_lsn_t xc_current_sequence; |
420 | struct work_struct xc_push_work; | ||
421 | xfs_lsn_t xc_push_seq; | ||
420 | }; | 422 | }; |
421 | 423 | ||
422 | /* | 424 | /* |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 8ecad5bad66c..ca386909131a 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -40,7 +40,6 @@ | |||
40 | #include "xfs_extfree_item.h" | 40 | #include "xfs_extfree_item.h" |
41 | #include "xfs_trans_priv.h" | 41 | #include "xfs_trans_priv.h" |
42 | #include "xfs_quota.h" | 42 | #include "xfs_quota.h" |
43 | #include "xfs_rw.h" | ||
44 | #include "xfs_utils.h" | 43 | #include "xfs_utils.h" |
45 | #include "xfs_trace.h" | 44 | #include "xfs_trace.h" |
46 | 45 | ||
@@ -120,7 +119,7 @@ xlog_get_bp( | |||
120 | nbblks += log->l_sectBBsize; | 119 | nbblks += log->l_sectBBsize; |
121 | nbblks = round_up(nbblks, log->l_sectBBsize); | 120 | nbblks = round_up(nbblks, log->l_sectBBsize); |
122 | 121 | ||
123 | bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0); | 122 | bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); |
124 | if (bp) | 123 | if (bp) |
125 | xfs_buf_unlock(bp); | 124 | xfs_buf_unlock(bp); |
126 | return bp; | 125 | return bp; |
@@ -146,7 +145,7 @@ xlog_align( | |||
146 | { | 145 | { |
147 | xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); | 146 | xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); |
148 | 147 | ||
149 | ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); | 148 | ASSERT(offset + nbblks <= bp->b_length); |
150 | return bp->b_addr + BBTOB(offset); | 149 | return bp->b_addr + BBTOB(offset); |
151 | } | 150 | } |
152 | 151 | ||
@@ -174,11 +173,12 @@ xlog_bread_noalign( | |||
174 | nbblks = round_up(nbblks, log->l_sectBBsize); | 173 | nbblks = round_up(nbblks, log->l_sectBBsize); |
175 | 174 | ||
176 | ASSERT(nbblks > 0); | 175 | ASSERT(nbblks > 0); |
177 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); | 176 | ASSERT(nbblks <= bp->b_length); |
178 | 177 | ||
179 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 178 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
180 | XFS_BUF_READ(bp); | 179 | XFS_BUF_READ(bp); |
181 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 180 | bp->b_io_length = nbblks; |
181 | bp->b_error = 0; | ||
182 | 182 | ||
183 | xfsbdstrat(log->l_mp, bp); | 183 | xfsbdstrat(log->l_mp, bp); |
184 | error = xfs_buf_iowait(bp); | 184 | error = xfs_buf_iowait(bp); |
@@ -218,7 +218,7 @@ xlog_bread_offset( | |||
218 | xfs_caddr_t offset) | 218 | xfs_caddr_t offset) |
219 | { | 219 | { |
220 | xfs_caddr_t orig_offset = bp->b_addr; | 220 | xfs_caddr_t orig_offset = bp->b_addr; |
221 | int orig_len = bp->b_buffer_length; | 221 | int orig_len = BBTOB(bp->b_length); |
222 | int error, error2; | 222 | int error, error2; |
223 | 223 | ||
224 | error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); | 224 | error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); |
@@ -259,13 +259,14 @@ xlog_bwrite( | |||
259 | nbblks = round_up(nbblks, log->l_sectBBsize); | 259 | nbblks = round_up(nbblks, log->l_sectBBsize); |
260 | 260 | ||
261 | ASSERT(nbblks > 0); | 261 | ASSERT(nbblks > 0); |
262 | ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); | 262 | ASSERT(nbblks <= bp->b_length); |
263 | 263 | ||
264 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 264 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
265 | XFS_BUF_ZEROFLAGS(bp); | 265 | XFS_BUF_ZEROFLAGS(bp); |
266 | xfs_buf_hold(bp); | 266 | xfs_buf_hold(bp); |
267 | xfs_buf_lock(bp); | 267 | xfs_buf_lock(bp); |
268 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 268 | bp->b_io_length = nbblks; |
269 | bp->b_error = 0; | ||
269 | 270 | ||
270 | error = xfs_bwrite(bp); | 271 | error = xfs_bwrite(bp); |
271 | if (error) | 272 | if (error) |
@@ -440,6 +441,8 @@ xlog_find_verify_cycle( | |||
440 | * a log sector, or we're out of luck. | 441 | * a log sector, or we're out of luck. |
441 | */ | 442 | */ |
442 | bufblks = 1 << ffs(nbblks); | 443 | bufblks = 1 << ffs(nbblks); |
444 | while (bufblks > log->l_logBBsize) | ||
445 | bufblks >>= 1; | ||
443 | while (!(bp = xlog_get_bp(log, bufblks))) { | 446 | while (!(bp = xlog_get_bp(log, bufblks))) { |
444 | bufblks >>= 1; | 447 | bufblks >>= 1; |
445 | if (bufblks < log->l_sectBBsize) | 448 | if (bufblks < log->l_sectBBsize) |
@@ -1225,6 +1228,8 @@ xlog_write_log_records( | |||
1225 | * log sector, or we're out of luck. | 1228 | * log sector, or we're out of luck. |
1226 | */ | 1229 | */ |
1227 | bufblks = 1 << ffs(blocks); | 1230 | bufblks = 1 << ffs(blocks); |
1231 | while (bufblks > log->l_logBBsize) | ||
1232 | bufblks >>= 1; | ||
1228 | while (!(bp = xlog_get_bp(log, bufblks))) { | 1233 | while (!(bp = xlog_get_bp(log, bufblks))) { |
1229 | bufblks >>= 1; | 1234 | bufblks >>= 1; |
1230 | if (bufblks < sectbb) | 1235 | if (bufblks < sectbb) |
@@ -1772,7 +1777,7 @@ xlog_recover_do_inode_buffer( | |||
1772 | 1777 | ||
1773 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); | 1778 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
1774 | 1779 | ||
1775 | inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; | 1780 | inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; |
1776 | for (i = 0; i < inodes_per_buf; i++) { | 1781 | for (i = 0; i < inodes_per_buf; i++) { |
1777 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + | 1782 | next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
1778 | offsetof(xfs_dinode_t, di_next_unlinked); | 1783 | offsetof(xfs_dinode_t, di_next_unlinked); |
@@ -1814,7 +1819,8 @@ xlog_recover_do_inode_buffer( | |||
1814 | 1819 | ||
1815 | ASSERT(item->ri_buf[item_index].i_addr != NULL); | 1820 | ASSERT(item->ri_buf[item_index].i_addr != NULL); |
1816 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); | 1821 | ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
1817 | ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); | 1822 | ASSERT((reg_buf_offset + reg_buf_bytes) <= |
1823 | BBTOB(bp->b_io_length)); | ||
1818 | 1824 | ||
1819 | /* | 1825 | /* |
1820 | * The current logged region contains a copy of the | 1826 | * The current logged region contains a copy of the |
@@ -1873,8 +1879,8 @@ xlog_recover_do_reg_buffer( | |||
1873 | ASSERT(nbits > 0); | 1879 | ASSERT(nbits > 0); |
1874 | ASSERT(item->ri_buf[i].i_addr != NULL); | 1880 | ASSERT(item->ri_buf[i].i_addr != NULL); |
1875 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); | 1881 | ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
1876 | ASSERT(XFS_BUF_COUNT(bp) >= | 1882 | ASSERT(BBTOB(bp->b_io_length) >= |
1877 | ((uint)bit << XFS_BLF_SHIFT)+(nbits<<XFS_BLF_SHIFT)); | 1883 | ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); |
1878 | 1884 | ||
1879 | /* | 1885 | /* |
1880 | * Do a sanity check if this is a dquot buffer. Just checking | 1886 | * Do a sanity check if this is a dquot buffer. Just checking |
@@ -2103,6 +2109,7 @@ xlog_recover_do_dquot_buffer( | |||
2103 | STATIC int | 2109 | STATIC int |
2104 | xlog_recover_buffer_pass2( | 2110 | xlog_recover_buffer_pass2( |
2105 | xlog_t *log, | 2111 | xlog_t *log, |
2112 | struct list_head *buffer_list, | ||
2106 | xlog_recover_item_t *item) | 2113 | xlog_recover_item_t *item) |
2107 | { | 2114 | { |
2108 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 2115 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
@@ -2123,9 +2130,9 @@ xlog_recover_buffer_pass2( | |||
2123 | 2130 | ||
2124 | trace_xfs_log_recover_buf_recover(log, buf_f); | 2131 | trace_xfs_log_recover_buf_recover(log, buf_f); |
2125 | 2132 | ||
2126 | buf_flags = XBF_LOCK; | 2133 | buf_flags = 0; |
2127 | if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF)) | 2134 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) |
2128 | buf_flags |= XBF_MAPPED; | 2135 | buf_flags |= XBF_UNMAPPED; |
2129 | 2136 | ||
2130 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, | 2137 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
2131 | buf_flags); | 2138 | buf_flags); |
@@ -2166,14 +2173,14 @@ xlog_recover_buffer_pass2( | |||
2166 | */ | 2173 | */ |
2167 | if (XFS_DINODE_MAGIC == | 2174 | if (XFS_DINODE_MAGIC == |
2168 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && | 2175 | be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && |
2169 | (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, | 2176 | (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, |
2170 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { | 2177 | (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { |
2171 | xfs_buf_stale(bp); | 2178 | xfs_buf_stale(bp); |
2172 | error = xfs_bwrite(bp); | 2179 | error = xfs_bwrite(bp); |
2173 | } else { | 2180 | } else { |
2174 | ASSERT(bp->b_target->bt_mount == mp); | 2181 | ASSERT(bp->b_target->bt_mount == mp); |
2175 | bp->b_iodone = xlog_recover_iodone; | 2182 | bp->b_iodone = xlog_recover_iodone; |
2176 | xfs_buf_delwri_queue(bp); | 2183 | xfs_buf_delwri_queue(bp, buffer_list); |
2177 | } | 2184 | } |
2178 | 2185 | ||
2179 | xfs_buf_relse(bp); | 2186 | xfs_buf_relse(bp); |
@@ -2183,6 +2190,7 @@ xlog_recover_buffer_pass2( | |||
2183 | STATIC int | 2190 | STATIC int |
2184 | xlog_recover_inode_pass2( | 2191 | xlog_recover_inode_pass2( |
2185 | xlog_t *log, | 2192 | xlog_t *log, |
2193 | struct list_head *buffer_list, | ||
2186 | xlog_recover_item_t *item) | 2194 | xlog_recover_item_t *item) |
2187 | { | 2195 | { |
2188 | xfs_inode_log_format_t *in_f; | 2196 | xfs_inode_log_format_t *in_f; |
@@ -2220,8 +2228,7 @@ xlog_recover_inode_pass2( | |||
2220 | } | 2228 | } |
2221 | trace_xfs_log_recover_inode_recover(log, in_f); | 2229 | trace_xfs_log_recover_inode_recover(log, in_f); |
2222 | 2230 | ||
2223 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, | 2231 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0); |
2224 | XBF_LOCK); | ||
2225 | if (!bp) { | 2232 | if (!bp) { |
2226 | error = ENOMEM; | 2233 | error = ENOMEM; |
2227 | goto error; | 2234 | goto error; |
@@ -2436,7 +2443,7 @@ xlog_recover_inode_pass2( | |||
2436 | write_inode_buffer: | 2443 | write_inode_buffer: |
2437 | ASSERT(bp->b_target->bt_mount == mp); | 2444 | ASSERT(bp->b_target->bt_mount == mp); |
2438 | bp->b_iodone = xlog_recover_iodone; | 2445 | bp->b_iodone = xlog_recover_iodone; |
2439 | xfs_buf_delwri_queue(bp); | 2446 | xfs_buf_delwri_queue(bp, buffer_list); |
2440 | xfs_buf_relse(bp); | 2447 | xfs_buf_relse(bp); |
2441 | error: | 2448 | error: |
2442 | if (need_free) | 2449 | if (need_free) |
@@ -2477,6 +2484,7 @@ xlog_recover_quotaoff_pass1( | |||
2477 | STATIC int | 2484 | STATIC int |
2478 | xlog_recover_dquot_pass2( | 2485 | xlog_recover_dquot_pass2( |
2479 | xlog_t *log, | 2486 | xlog_t *log, |
2487 | struct list_head *buffer_list, | ||
2480 | xlog_recover_item_t *item) | 2488 | xlog_recover_item_t *item) |
2481 | { | 2489 | { |
2482 | xfs_mount_t *mp = log->l_mp; | 2490 | xfs_mount_t *mp = log->l_mp; |
@@ -2530,14 +2538,11 @@ xlog_recover_dquot_pass2( | |||
2530 | return XFS_ERROR(EIO); | 2538 | return XFS_ERROR(EIO); |
2531 | ASSERT(dq_f->qlf_len == 1); | 2539 | ASSERT(dq_f->qlf_len == 1); |
2532 | 2540 | ||
2533 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 2541 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, |
2534 | dq_f->qlf_blkno, | 2542 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp); |
2535 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), | 2543 | if (error) |
2536 | 0, &bp); | ||
2537 | if (error) { | ||
2538 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#3)"); | ||
2539 | return error; | 2544 | return error; |
2540 | } | 2545 | |
2541 | ASSERT(bp); | 2546 | ASSERT(bp); |
2542 | ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); | 2547 | ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); |
2543 | 2548 | ||
@@ -2558,7 +2563,7 @@ xlog_recover_dquot_pass2( | |||
2558 | ASSERT(dq_f->qlf_size == 2); | 2563 | ASSERT(dq_f->qlf_size == 2); |
2559 | ASSERT(bp->b_target->bt_mount == mp); | 2564 | ASSERT(bp->b_target->bt_mount == mp); |
2560 | bp->b_iodone = xlog_recover_iodone; | 2565 | bp->b_iodone = xlog_recover_iodone; |
2561 | xfs_buf_delwri_queue(bp); | 2566 | xfs_buf_delwri_queue(bp, buffer_list); |
2562 | xfs_buf_relse(bp); | 2567 | xfs_buf_relse(bp); |
2563 | 2568 | ||
2564 | return (0); | 2569 | return (0); |
@@ -2642,7 +2647,8 @@ xlog_recover_efd_pass2( | |||
2642 | * xfs_trans_ail_delete() drops the | 2647 | * xfs_trans_ail_delete() drops the |
2643 | * AIL lock. | 2648 | * AIL lock. |
2644 | */ | 2649 | */ |
2645 | xfs_trans_ail_delete(ailp, lip); | 2650 | xfs_trans_ail_delete(ailp, lip, |
2651 | SHUTDOWN_CORRUPT_INCORE); | ||
2646 | xfs_efi_item_free(efip); | 2652 | xfs_efi_item_free(efip); |
2647 | spin_lock(&ailp->xa_lock); | 2653 | spin_lock(&ailp->xa_lock); |
2648 | break; | 2654 | break; |
@@ -2712,21 +2718,22 @@ STATIC int | |||
2712 | xlog_recover_commit_pass2( | 2718 | xlog_recover_commit_pass2( |
2713 | struct log *log, | 2719 | struct log *log, |
2714 | struct xlog_recover *trans, | 2720 | struct xlog_recover *trans, |
2721 | struct list_head *buffer_list, | ||
2715 | xlog_recover_item_t *item) | 2722 | xlog_recover_item_t *item) |
2716 | { | 2723 | { |
2717 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); | 2724 | trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); |
2718 | 2725 | ||
2719 | switch (ITEM_TYPE(item)) { | 2726 | switch (ITEM_TYPE(item)) { |
2720 | case XFS_LI_BUF: | 2727 | case XFS_LI_BUF: |
2721 | return xlog_recover_buffer_pass2(log, item); | 2728 | return xlog_recover_buffer_pass2(log, buffer_list, item); |
2722 | case XFS_LI_INODE: | 2729 | case XFS_LI_INODE: |
2723 | return xlog_recover_inode_pass2(log, item); | 2730 | return xlog_recover_inode_pass2(log, buffer_list, item); |
2724 | case XFS_LI_EFI: | 2731 | case XFS_LI_EFI: |
2725 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); | 2732 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); |
2726 | case XFS_LI_EFD: | 2733 | case XFS_LI_EFD: |
2727 | return xlog_recover_efd_pass2(log, item); | 2734 | return xlog_recover_efd_pass2(log, item); |
2728 | case XFS_LI_DQUOT: | 2735 | case XFS_LI_DQUOT: |
2729 | return xlog_recover_dquot_pass2(log, item); | 2736 | return xlog_recover_dquot_pass2(log, buffer_list, item); |
2730 | case XFS_LI_QUOTAOFF: | 2737 | case XFS_LI_QUOTAOFF: |
2731 | /* nothing to do in pass2 */ | 2738 | /* nothing to do in pass2 */ |
2732 | return 0; | 2739 | return 0; |
@@ -2750,8 +2757,9 @@ xlog_recover_commit_trans( | |||
2750 | struct xlog_recover *trans, | 2757 | struct xlog_recover *trans, |
2751 | int pass) | 2758 | int pass) |
2752 | { | 2759 | { |
2753 | int error = 0; | 2760 | int error = 0, error2; |
2754 | xlog_recover_item_t *item; | 2761 | xlog_recover_item_t *item; |
2762 | LIST_HEAD (buffer_list); | ||
2755 | 2763 | ||
2756 | hlist_del(&trans->r_list); | 2764 | hlist_del(&trans->r_list); |
2757 | 2765 | ||
@@ -2760,16 +2768,27 @@ xlog_recover_commit_trans( | |||
2760 | return error; | 2768 | return error; |
2761 | 2769 | ||
2762 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | 2770 | list_for_each_entry(item, &trans->r_itemq, ri_list) { |
2763 | if (pass == XLOG_RECOVER_PASS1) | 2771 | switch (pass) { |
2772 | case XLOG_RECOVER_PASS1: | ||
2764 | error = xlog_recover_commit_pass1(log, trans, item); | 2773 | error = xlog_recover_commit_pass1(log, trans, item); |
2765 | else | 2774 | break; |
2766 | error = xlog_recover_commit_pass2(log, trans, item); | 2775 | case XLOG_RECOVER_PASS2: |
2776 | error = xlog_recover_commit_pass2(log, trans, | ||
2777 | &buffer_list, item); | ||
2778 | break; | ||
2779 | default: | ||
2780 | ASSERT(0); | ||
2781 | } | ||
2782 | |||
2767 | if (error) | 2783 | if (error) |
2768 | return error; | 2784 | goto out; |
2769 | } | 2785 | } |
2770 | 2786 | ||
2771 | xlog_recover_free_trans(trans); | 2787 | xlog_recover_free_trans(trans); |
2772 | return 0; | 2788 | |
2789 | out: | ||
2790 | error2 = xfs_buf_delwri_submit(&buffer_list); | ||
2791 | return error ? error : error2; | ||
2773 | } | 2792 | } |
2774 | 2793 | ||
2775 | STATIC int | 2794 | STATIC int |
@@ -3079,7 +3098,7 @@ xlog_recover_process_one_iunlink( | |||
3079 | /* | 3098 | /* |
3080 | * Get the on disk inode to find the next inode in the bucket. | 3099 | * Get the on disk inode to find the next inode in the bucket. |
3081 | */ | 3100 | */ |
3082 | error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK); | 3101 | error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0); |
3083 | if (error) | 3102 | if (error) |
3084 | goto fail_iput; | 3103 | goto fail_iput; |
3085 | 3104 | ||
@@ -3639,11 +3658,8 @@ xlog_do_recover( | |||
3639 | * First replay the images in the log. | 3658 | * First replay the images in the log. |
3640 | */ | 3659 | */ |
3641 | error = xlog_do_log_recovery(log, head_blk, tail_blk); | 3660 | error = xlog_do_log_recovery(log, head_blk, tail_blk); |
3642 | if (error) { | 3661 | if (error) |
3643 | return error; | 3662 | return error; |
3644 | } | ||
3645 | |||
3646 | xfs_flush_buftarg(log->l_mp->m_ddev_targp, 1); | ||
3647 | 3663 | ||
3648 | /* | 3664 | /* |
3649 | * If IO errors happened during recovery, bail out. | 3665 | * If IO errors happened during recovery, bail out. |
@@ -3670,7 +3686,6 @@ xlog_do_recover( | |||
3670 | bp = xfs_getsb(log->l_mp, 0); | 3686 | bp = xfs_getsb(log->l_mp, 0); |
3671 | XFS_BUF_UNDONE(bp); | 3687 | XFS_BUF_UNDONE(bp); |
3672 | ASSERT(!(XFS_BUF_ISWRITE(bp))); | 3688 | ASSERT(!(XFS_BUF_ISWRITE(bp))); |
3673 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | ||
3674 | XFS_BUF_READ(bp); | 3689 | XFS_BUF_READ(bp); |
3675 | XFS_BUF_UNASYNC(bp); | 3690 | XFS_BUF_UNASYNC(bp); |
3676 | xfsbdstrat(log->l_mp, bp); | 3691 | xfsbdstrat(log->l_mp, bp); |
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index bd672def95ac..331cd9f83a7f 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 1ffead4b2296..536021fb3d4e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_trans_priv.h" | ||
25 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 27 | #include "xfs_ag.h" |
27 | #include "xfs_dir2.h" | 28 | #include "xfs_dir2.h" |
@@ -37,7 +38,6 @@ | |||
37 | #include "xfs_rtalloc.h" | 38 | #include "xfs_rtalloc.h" |
38 | #include "xfs_bmap.h" | 39 | #include "xfs_bmap.h" |
39 | #include "xfs_error.h" | 40 | #include "xfs_error.h" |
40 | #include "xfs_rw.h" | ||
41 | #include "xfs_quota.h" | 41 | #include "xfs_quota.h" |
42 | #include "xfs_fsops.h" | 42 | #include "xfs_fsops.h" |
43 | #include "xfs_utils.h" | 43 | #include "xfs_utils.h" |
@@ -683,8 +683,8 @@ xfs_readsb(xfs_mount_t *mp, int flags) | |||
683 | sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); | 683 | sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); |
684 | 684 | ||
685 | reread: | 685 | reread: |
686 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, | 686 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, |
687 | XFS_SB_DADDR, sector_size, 0); | 687 | BTOBB(sector_size), 0); |
688 | if (!bp) { | 688 | if (!bp) { |
689 | if (loud) | 689 | if (loud) |
690 | xfs_warn(mp, "SB buffer read failed"); | 690 | xfs_warn(mp, "SB buffer read failed"); |
@@ -1032,9 +1032,9 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
1032 | xfs_warn(mp, "filesystem size mismatch detected"); | 1032 | xfs_warn(mp, "filesystem size mismatch detected"); |
1033 | return XFS_ERROR(EFBIG); | 1033 | return XFS_ERROR(EFBIG); |
1034 | } | 1034 | } |
1035 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, | 1035 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, |
1036 | d - XFS_FSS_TO_BB(mp, 1), | 1036 | d - XFS_FSS_TO_BB(mp, 1), |
1037 | BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); | 1037 | XFS_FSS_TO_BB(mp, 1), 0); |
1038 | if (!bp) { | 1038 | if (!bp) { |
1039 | xfs_warn(mp, "last sector read failed"); | 1039 | xfs_warn(mp, "last sector read failed"); |
1040 | return EIO; | 1040 | return EIO; |
@@ -1047,9 +1047,9 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
1047 | xfs_warn(mp, "log size mismatch detected"); | 1047 | xfs_warn(mp, "log size mismatch detected"); |
1048 | return XFS_ERROR(EFBIG); | 1048 | return XFS_ERROR(EFBIG); |
1049 | } | 1049 | } |
1050 | bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, | 1050 | bp = xfs_buf_read_uncached(mp->m_logdev_targp, |
1051 | d - XFS_FSB_TO_BB(mp, 1), | 1051 | d - XFS_FSB_TO_BB(mp, 1), |
1052 | XFS_FSB_TO_B(mp, 1), 0); | 1052 | XFS_FSB_TO_BB(mp, 1), 0); |
1053 | if (!bp) { | 1053 | if (!bp) { |
1054 | xfs_warn(mp, "log device read failed"); | 1054 | xfs_warn(mp, "log device read failed"); |
1055 | return EIO; | 1055 | return EIO; |
@@ -1288,7 +1288,7 @@ xfs_mountfs( | |||
1288 | XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); | 1288 | XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); |
1289 | if (error) { | 1289 | if (error) { |
1290 | xfs_warn(mp, "log mount failed"); | 1290 | xfs_warn(mp, "log mount failed"); |
1291 | goto out_free_perag; | 1291 | goto out_fail_wait; |
1292 | } | 1292 | } |
1293 | 1293 | ||
1294 | /* | 1294 | /* |
@@ -1315,7 +1315,7 @@ xfs_mountfs( | |||
1315 | !mp->m_sb.sb_inprogress) { | 1315 | !mp->m_sb.sb_inprogress) { |
1316 | error = xfs_initialize_perag_data(mp, sbp->sb_agcount); | 1316 | error = xfs_initialize_perag_data(mp, sbp->sb_agcount); |
1317 | if (error) | 1317 | if (error) |
1318 | goto out_free_perag; | 1318 | goto out_fail_wait; |
1319 | } | 1319 | } |
1320 | 1320 | ||
1321 | /* | 1321 | /* |
@@ -1439,6 +1439,10 @@ xfs_mountfs( | |||
1439 | IRELE(rip); | 1439 | IRELE(rip); |
1440 | out_log_dealloc: | 1440 | out_log_dealloc: |
1441 | xfs_log_unmount(mp); | 1441 | xfs_log_unmount(mp); |
1442 | out_fail_wait: | ||
1443 | if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) | ||
1444 | xfs_wait_buftarg(mp->m_logdev_targp); | ||
1445 | xfs_wait_buftarg(mp->m_ddev_targp); | ||
1442 | out_free_perag: | 1446 | out_free_perag: |
1443 | xfs_free_perag(mp); | 1447 | xfs_free_perag(mp); |
1444 | out_remove_uuid: | 1448 | out_remove_uuid: |
@@ -1475,15 +1479,15 @@ xfs_unmountfs( | |||
1475 | xfs_log_force(mp, XFS_LOG_SYNC); | 1479 | xfs_log_force(mp, XFS_LOG_SYNC); |
1476 | 1480 | ||
1477 | /* | 1481 | /* |
1478 | * Do a delwri reclaim pass first so that as many dirty inodes are | 1482 | * Flush all pending changes from the AIL. |
1479 | * queued up for IO as possible. Then flush the buffers before making | 1483 | */ |
1480 | * a synchronous path to catch all the remaining inodes are reclaimed. | 1484 | xfs_ail_push_all_sync(mp->m_ail); |
1481 | * This makes the reclaim process as quick as possible by avoiding | 1485 | |
1482 | * synchronous writeout and blocking on inodes already in the delwri | 1486 | /* |
1483 | * state as much as possible. | 1487 | * And reclaim all inodes. At this point there should be no dirty |
1488 | * inode, and none should be pinned or locked, but use synchronous | ||
1489 | * reclaim just to be sure. | ||
1484 | */ | 1490 | */ |
1485 | xfs_reclaim_inodes(mp, 0); | ||
1486 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
1487 | xfs_reclaim_inodes(mp, SYNC_WAIT); | 1491 | xfs_reclaim_inodes(mp, SYNC_WAIT); |
1488 | 1492 | ||
1489 | xfs_qm_unmount(mp); | 1493 | xfs_qm_unmount(mp); |
@@ -1519,15 +1523,12 @@ xfs_unmountfs( | |||
1519 | if (error) | 1523 | if (error) |
1520 | xfs_warn(mp, "Unable to update superblock counters. " | 1524 | xfs_warn(mp, "Unable to update superblock counters. " |
1521 | "Freespace may not be correct on next mount."); | 1525 | "Freespace may not be correct on next mount."); |
1522 | xfs_unmountfs_writesb(mp); | ||
1523 | 1526 | ||
1524 | /* | 1527 | /* |
1525 | * Make sure all buffers have been flushed and completed before | 1528 | * At this point we might have modified the superblock again and thus |
1526 | * unmounting the log. | 1529 | * added an item to the AIL, thus flush it again. |
1527 | */ | 1530 | */ |
1528 | error = xfs_flush_buftarg(mp->m_ddev_targp, 1); | 1531 | xfs_ail_push_all_sync(mp->m_ail); |
1529 | if (error) | ||
1530 | xfs_warn(mp, "%d busy buffers during unmount.", error); | ||
1531 | xfs_wait_buftarg(mp->m_ddev_targp); | 1532 | xfs_wait_buftarg(mp->m_ddev_targp); |
1532 | 1533 | ||
1533 | xfs_log_unmount_write(mp); | 1534 | xfs_log_unmount_write(mp); |
@@ -1588,36 +1589,6 @@ xfs_log_sbcount(xfs_mount_t *mp) | |||
1588 | return error; | 1589 | return error; |
1589 | } | 1590 | } |
1590 | 1591 | ||
1591 | int | ||
1592 | xfs_unmountfs_writesb(xfs_mount_t *mp) | ||
1593 | { | ||
1594 | xfs_buf_t *sbp; | ||
1595 | int error = 0; | ||
1596 | |||
1597 | /* | ||
1598 | * skip superblock write if fs is read-only, or | ||
1599 | * if we are doing a forced umount. | ||
1600 | */ | ||
1601 | if (!((mp->m_flags & XFS_MOUNT_RDONLY) || | ||
1602 | XFS_FORCED_SHUTDOWN(mp))) { | ||
1603 | |||
1604 | sbp = xfs_getsb(mp, 0); | ||
1605 | |||
1606 | XFS_BUF_UNDONE(sbp); | ||
1607 | XFS_BUF_UNREAD(sbp); | ||
1608 | xfs_buf_delwri_dequeue(sbp); | ||
1609 | XFS_BUF_WRITE(sbp); | ||
1610 | XFS_BUF_UNASYNC(sbp); | ||
1611 | ASSERT(sbp->b_target == mp->m_ddev_targp); | ||
1612 | xfsbdstrat(mp, sbp); | ||
1613 | error = xfs_buf_iowait(sbp); | ||
1614 | if (error) | ||
1615 | xfs_buf_ioerror_alert(sbp, __func__); | ||
1616 | xfs_buf_relse(sbp); | ||
1617 | } | ||
1618 | return error; | ||
1619 | } | ||
1620 | |||
1621 | /* | 1592 | /* |
1622 | * xfs_mod_sb() can be used to copy arbitrary changes to the | 1593 | * xfs_mod_sb() can be used to copy arbitrary changes to the |
1623 | * in-core superblock into the superblock buffer to be logged. | 1594 | * in-core superblock into the superblock buffer to be logged. |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 9eba73887829..8b89c5ac72d9 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -214,6 +214,7 @@ typedef struct xfs_mount { | |||
214 | 214 | ||
215 | struct workqueue_struct *m_data_workqueue; | 215 | struct workqueue_struct *m_data_workqueue; |
216 | struct workqueue_struct *m_unwritten_workqueue; | 216 | struct workqueue_struct *m_unwritten_workqueue; |
217 | struct workqueue_struct *m_cil_workqueue; | ||
217 | } xfs_mount_t; | 218 | } xfs_mount_t; |
218 | 219 | ||
219 | /* | 220 | /* |
@@ -378,7 +379,6 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); | |||
378 | extern int xfs_mountfs(xfs_mount_t *mp); | 379 | extern int xfs_mountfs(xfs_mount_t *mp); |
379 | 380 | ||
380 | extern void xfs_unmountfs(xfs_mount_t *); | 381 | extern void xfs_unmountfs(xfs_mount_t *); |
381 | extern int xfs_unmountfs_writesb(xfs_mount_t *); | ||
382 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); | 382 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); |
383 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, | 383 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, |
384 | uint, int); | 384 | uint, int); |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 55c6afedc879..249db1987764 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | 20 | #include "xfs_bit.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
@@ -65,7 +64,8 @@ STATIC int | |||
65 | xfs_qm_dquot_walk( | 64 | xfs_qm_dquot_walk( |
66 | struct xfs_mount *mp, | 65 | struct xfs_mount *mp, |
67 | int type, | 66 | int type, |
68 | int (*execute)(struct xfs_dquot *dqp)) | 67 | int (*execute)(struct xfs_dquot *dqp, void *data), |
68 | void *data) | ||
69 | { | 69 | { |
70 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 70 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
71 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); | 71 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); |
@@ -97,7 +97,7 @@ restart: | |||
97 | 97 | ||
98 | next_index = be32_to_cpu(dqp->q_core.d_id) + 1; | 98 | next_index = be32_to_cpu(dqp->q_core.d_id) + 1; |
99 | 99 | ||
100 | error = execute(batch[i]); | 100 | error = execute(batch[i], data); |
101 | if (error == EAGAIN) { | 101 | if (error == EAGAIN) { |
102 | skipped++; | 102 | skipped++; |
103 | continue; | 103 | continue; |
@@ -129,7 +129,8 @@ restart: | |||
129 | */ | 129 | */ |
130 | STATIC int | 130 | STATIC int |
131 | xfs_qm_dqpurge( | 131 | xfs_qm_dqpurge( |
132 | struct xfs_dquot *dqp) | 132 | struct xfs_dquot *dqp, |
133 | void *data) | ||
133 | { | 134 | { |
134 | struct xfs_mount *mp = dqp->q_mount; | 135 | struct xfs_mount *mp = dqp->q_mount; |
135 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 136 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
@@ -153,21 +154,7 @@ xfs_qm_dqpurge( | |||
153 | 154 | ||
154 | dqp->dq_flags |= XFS_DQ_FREEING; | 155 | dqp->dq_flags |= XFS_DQ_FREEING; |
155 | 156 | ||
156 | /* | 157 | xfs_dqflock(dqp); |
157 | * If we're turning off quotas, we have to make sure that, for | ||
158 | * example, we don't delete quota disk blocks while dquots are | ||
159 | * in the process of getting written to those disk blocks. | ||
160 | * This dquot might well be on AIL, and we can't leave it there | ||
161 | * if we're turning off quotas. Basically, we need this flush | ||
162 | * lock, and are willing to block on it. | ||
163 | */ | ||
164 | if (!xfs_dqflock_nowait(dqp)) { | ||
165 | /* | ||
166 | * Block on the flush lock after nudging dquot buffer, | ||
167 | * if it is incore. | ||
168 | */ | ||
169 | xfs_dqflock_pushbuf_wait(dqp); | ||
170 | } | ||
171 | 158 | ||
172 | /* | 159 | /* |
173 | * If we are turning this type of quotas off, we don't care | 160 | * If we are turning this type of quotas off, we don't care |
@@ -175,16 +162,21 @@ xfs_qm_dqpurge( | |||
175 | * we're unmounting, we do care, so we flush it and wait. | 162 | * we're unmounting, we do care, so we flush it and wait. |
176 | */ | 163 | */ |
177 | if (XFS_DQ_IS_DIRTY(dqp)) { | 164 | if (XFS_DQ_IS_DIRTY(dqp)) { |
178 | int error; | 165 | struct xfs_buf *bp = NULL; |
166 | int error; | ||
179 | 167 | ||
180 | /* | 168 | /* |
181 | * We don't care about getting disk errors here. We need | 169 | * We don't care about getting disk errors here. We need |
182 | * to purge this dquot anyway, so we go ahead regardless. | 170 | * to purge this dquot anyway, so we go ahead regardless. |
183 | */ | 171 | */ |
184 | error = xfs_qm_dqflush(dqp, SYNC_WAIT); | 172 | error = xfs_qm_dqflush(dqp, &bp); |
185 | if (error) | 173 | if (error) { |
186 | xfs_warn(mp, "%s: dquot %p flush failed", | 174 | xfs_warn(mp, "%s: dquot %p flush failed", |
187 | __func__, dqp); | 175 | __func__, dqp); |
176 | } else { | ||
177 | error = xfs_bwrite(bp); | ||
178 | xfs_buf_relse(bp); | ||
179 | } | ||
188 | xfs_dqflock(dqp); | 180 | xfs_dqflock(dqp); |
189 | } | 181 | } |
190 | 182 | ||
@@ -226,11 +218,11 @@ xfs_qm_dqpurge_all( | |||
226 | uint flags) | 218 | uint flags) |
227 | { | 219 | { |
228 | if (flags & XFS_QMOPT_UQUOTA) | 220 | if (flags & XFS_QMOPT_UQUOTA) |
229 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge); | 221 | xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); |
230 | if (flags & XFS_QMOPT_GQUOTA) | 222 | if (flags & XFS_QMOPT_GQUOTA) |
231 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge); | 223 | xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); |
232 | if (flags & XFS_QMOPT_PQUOTA) | 224 | if (flags & XFS_QMOPT_PQUOTA) |
233 | xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge); | 225 | xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, NULL); |
234 | } | 226 | } |
235 | 227 | ||
236 | /* | 228 | /* |
@@ -483,6 +475,23 @@ done: | |||
483 | xfs_dqunlock(udq); | 475 | xfs_dqunlock(udq); |
484 | } | 476 | } |
485 | 477 | ||
478 | static bool | ||
479 | xfs_qm_need_dqattach( | ||
480 | struct xfs_inode *ip) | ||
481 | { | ||
482 | struct xfs_mount *mp = ip->i_mount; | ||
483 | |||
484 | if (!XFS_IS_QUOTA_RUNNING(mp)) | ||
485 | return false; | ||
486 | if (!XFS_IS_QUOTA_ON(mp)) | ||
487 | return false; | ||
488 | if (!XFS_NOT_DQATTACHED(mp, ip)) | ||
489 | return false; | ||
490 | if (ip->i_ino == mp->m_sb.sb_uquotino || | ||
491 | ip->i_ino == mp->m_sb.sb_gquotino) | ||
492 | return false; | ||
493 | return true; | ||
494 | } | ||
486 | 495 | ||
487 | /* | 496 | /* |
488 | * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON | 497 | * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON |
@@ -500,11 +509,7 @@ xfs_qm_dqattach_locked( | |||
500 | uint nquotas = 0; | 509 | uint nquotas = 0; |
501 | int error = 0; | 510 | int error = 0; |
502 | 511 | ||
503 | if (!XFS_IS_QUOTA_RUNNING(mp) || | 512 | if (!xfs_qm_need_dqattach(ip)) |
504 | !XFS_IS_QUOTA_ON(mp) || | ||
505 | !XFS_NOT_DQATTACHED(mp, ip) || | ||
506 | ip->i_ino == mp->m_sb.sb_uquotino || | ||
507 | ip->i_ino == mp->m_sb.sb_gquotino) | ||
508 | return 0; | 513 | return 0; |
509 | 514 | ||
510 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 515 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
@@ -575,6 +580,9 @@ xfs_qm_dqattach( | |||
575 | { | 580 | { |
576 | int error; | 581 | int error; |
577 | 582 | ||
583 | if (!xfs_qm_need_dqattach(ip)) | ||
584 | return 0; | ||
585 | |||
578 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 586 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
579 | error = xfs_qm_dqattach_locked(ip, flags); | 587 | error = xfs_qm_dqattach_locked(ip, flags); |
580 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 588 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
@@ -855,15 +863,16 @@ xfs_qm_reset_dqcounts( | |||
855 | 863 | ||
856 | STATIC int | 864 | STATIC int |
857 | xfs_qm_dqiter_bufs( | 865 | xfs_qm_dqiter_bufs( |
858 | xfs_mount_t *mp, | 866 | struct xfs_mount *mp, |
859 | xfs_dqid_t firstid, | 867 | xfs_dqid_t firstid, |
860 | xfs_fsblock_t bno, | 868 | xfs_fsblock_t bno, |
861 | xfs_filblks_t blkcnt, | 869 | xfs_filblks_t blkcnt, |
862 | uint flags) | 870 | uint flags, |
871 | struct list_head *buffer_list) | ||
863 | { | 872 | { |
864 | xfs_buf_t *bp; | 873 | struct xfs_buf *bp; |
865 | int error; | 874 | int error; |
866 | int type; | 875 | int type; |
867 | 876 | ||
868 | ASSERT(blkcnt > 0); | 877 | ASSERT(blkcnt > 0); |
869 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : | 878 | type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : |
@@ -887,7 +896,7 @@ xfs_qm_dqiter_bufs( | |||
887 | break; | 896 | break; |
888 | 897 | ||
889 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); | 898 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); |
890 | xfs_buf_delwri_queue(bp); | 899 | xfs_buf_delwri_queue(bp, buffer_list); |
891 | xfs_buf_relse(bp); | 900 | xfs_buf_relse(bp); |
892 | /* | 901 | /* |
893 | * goto the next block. | 902 | * goto the next block. |
@@ -895,6 +904,7 @@ xfs_qm_dqiter_bufs( | |||
895 | bno++; | 904 | bno++; |
896 | firstid += mp->m_quotainfo->qi_dqperchunk; | 905 | firstid += mp->m_quotainfo->qi_dqperchunk; |
897 | } | 906 | } |
907 | |||
898 | return error; | 908 | return error; |
899 | } | 909 | } |
900 | 910 | ||
@@ -904,11 +914,12 @@ xfs_qm_dqiter_bufs( | |||
904 | */ | 914 | */ |
905 | STATIC int | 915 | STATIC int |
906 | xfs_qm_dqiterate( | 916 | xfs_qm_dqiterate( |
907 | xfs_mount_t *mp, | 917 | struct xfs_mount *mp, |
908 | xfs_inode_t *qip, | 918 | struct xfs_inode *qip, |
909 | uint flags) | 919 | uint flags, |
920 | struct list_head *buffer_list) | ||
910 | { | 921 | { |
911 | xfs_bmbt_irec_t *map; | 922 | struct xfs_bmbt_irec *map; |
912 | int i, nmaps; /* number of map entries */ | 923 | int i, nmaps; /* number of map entries */ |
913 | int error; /* return value */ | 924 | int error; /* return value */ |
914 | xfs_fileoff_t lblkno; | 925 | xfs_fileoff_t lblkno; |
@@ -975,21 +986,17 @@ xfs_qm_dqiterate( | |||
975 | * Iterate thru all the blks in the extent and | 986 | * Iterate thru all the blks in the extent and |
976 | * reset the counters of all the dquots inside them. | 987 | * reset the counters of all the dquots inside them. |
977 | */ | 988 | */ |
978 | if ((error = xfs_qm_dqiter_bufs(mp, | 989 | error = xfs_qm_dqiter_bufs(mp, firstid, |
979 | firstid, | 990 | map[i].br_startblock, |
980 | map[i].br_startblock, | 991 | map[i].br_blockcount, |
981 | map[i].br_blockcount, | 992 | flags, buffer_list); |
982 | flags))) { | 993 | if (error) |
983 | break; | 994 | goto out; |
984 | } | ||
985 | } | 995 | } |
986 | |||
987 | if (error) | ||
988 | break; | ||
989 | } while (nmaps > 0); | 996 | } while (nmaps > 0); |
990 | 997 | ||
998 | out: | ||
991 | kmem_free(map); | 999 | kmem_free(map); |
992 | |||
993 | return error; | 1000 | return error; |
994 | } | 1001 | } |
995 | 1002 | ||
@@ -1182,8 +1189,11 @@ error0: | |||
1182 | 1189 | ||
1183 | STATIC int | 1190 | STATIC int |
1184 | xfs_qm_flush_one( | 1191 | xfs_qm_flush_one( |
1185 | struct xfs_dquot *dqp) | 1192 | struct xfs_dquot *dqp, |
1193 | void *data) | ||
1186 | { | 1194 | { |
1195 | struct list_head *buffer_list = data; | ||
1196 | struct xfs_buf *bp = NULL; | ||
1187 | int error = 0; | 1197 | int error = 0; |
1188 | 1198 | ||
1189 | xfs_dqlock(dqp); | 1199 | xfs_dqlock(dqp); |
@@ -1192,11 +1202,13 @@ xfs_qm_flush_one( | |||
1192 | if (!XFS_DQ_IS_DIRTY(dqp)) | 1202 | if (!XFS_DQ_IS_DIRTY(dqp)) |
1193 | goto out_unlock; | 1203 | goto out_unlock; |
1194 | 1204 | ||
1195 | if (!xfs_dqflock_nowait(dqp)) | 1205 | xfs_dqflock(dqp); |
1196 | xfs_dqflock_pushbuf_wait(dqp); | 1206 | error = xfs_qm_dqflush(dqp, &bp); |
1197 | 1207 | if (error) | |
1198 | error = xfs_qm_dqflush(dqp, 0); | 1208 | goto out_unlock; |
1199 | 1209 | ||
1210 | xfs_buf_delwri_queue(bp, buffer_list); | ||
1211 | xfs_buf_relse(bp); | ||
1200 | out_unlock: | 1212 | out_unlock: |
1201 | xfs_dqunlock(dqp); | 1213 | xfs_dqunlock(dqp); |
1202 | return error; | 1214 | return error; |
@@ -1215,6 +1227,7 @@ xfs_qm_quotacheck( | |||
1215 | size_t structsz; | 1227 | size_t structsz; |
1216 | xfs_inode_t *uip, *gip; | 1228 | xfs_inode_t *uip, *gip; |
1217 | uint flags; | 1229 | uint flags; |
1230 | LIST_HEAD (buffer_list); | ||
1218 | 1231 | ||
1219 | count = INT_MAX; | 1232 | count = INT_MAX; |
1220 | structsz = 1; | 1233 | structsz = 1; |
@@ -1233,7 +1246,8 @@ xfs_qm_quotacheck( | |||
1233 | */ | 1246 | */ |
1234 | uip = mp->m_quotainfo->qi_uquotaip; | 1247 | uip = mp->m_quotainfo->qi_uquotaip; |
1235 | if (uip) { | 1248 | if (uip) { |
1236 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA); | 1249 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, |
1250 | &buffer_list); | ||
1237 | if (error) | 1251 | if (error) |
1238 | goto error_return; | 1252 | goto error_return; |
1239 | flags |= XFS_UQUOTA_CHKD; | 1253 | flags |= XFS_UQUOTA_CHKD; |
@@ -1242,7 +1256,8 @@ xfs_qm_quotacheck( | |||
1242 | gip = mp->m_quotainfo->qi_gquotaip; | 1256 | gip = mp->m_quotainfo->qi_gquotaip; |
1243 | if (gip) { | 1257 | if (gip) { |
1244 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? | 1258 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? |
1245 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); | 1259 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, |
1260 | &buffer_list); | ||
1246 | if (error) | 1261 | if (error) |
1247 | goto error_return; | 1262 | goto error_return; |
1248 | flags |= XFS_OQUOTA_CHKD; | 1263 | flags |= XFS_OQUOTA_CHKD; |
@@ -1265,19 +1280,27 @@ xfs_qm_quotacheck( | |||
1265 | * We've made all the changes that we need to make incore. Flush them | 1280 | * We've made all the changes that we need to make incore. Flush them |
1266 | * down to disk buffers if everything was updated successfully. | 1281 | * down to disk buffers if everything was updated successfully. |
1267 | */ | 1282 | */ |
1268 | if (XFS_IS_UQUOTA_ON(mp)) | 1283 | if (XFS_IS_UQUOTA_ON(mp)) { |
1269 | error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one); | 1284 | error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one, |
1285 | &buffer_list); | ||
1286 | } | ||
1270 | if (XFS_IS_GQUOTA_ON(mp)) { | 1287 | if (XFS_IS_GQUOTA_ON(mp)) { |
1271 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one); | 1288 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one, |
1289 | &buffer_list); | ||
1272 | if (!error) | 1290 | if (!error) |
1273 | error = error2; | 1291 | error = error2; |
1274 | } | 1292 | } |
1275 | if (XFS_IS_PQUOTA_ON(mp)) { | 1293 | if (XFS_IS_PQUOTA_ON(mp)) { |
1276 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one); | 1294 | error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one, |
1295 | &buffer_list); | ||
1277 | if (!error) | 1296 | if (!error) |
1278 | error = error2; | 1297 | error = error2; |
1279 | } | 1298 | } |
1280 | 1299 | ||
1300 | error2 = xfs_buf_delwri_submit(&buffer_list); | ||
1301 | if (!error) | ||
1302 | error = error2; | ||
1303 | |||
1281 | /* | 1304 | /* |
1282 | * We can get this error if we couldn't do a dquot allocation inside | 1305 | * We can get this error if we couldn't do a dquot allocation inside |
1283 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the | 1306 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the |
@@ -1291,15 +1314,6 @@ xfs_qm_quotacheck( | |||
1291 | } | 1314 | } |
1292 | 1315 | ||
1293 | /* | 1316 | /* |
1294 | * We didn't log anything, because if we crashed, we'll have to | ||
1295 | * start the quotacheck from scratch anyway. However, we must make | ||
1296 | * sure that our dquot changes are secure before we put the | ||
1297 | * quotacheck'd stamp on the superblock. So, here we do a synchronous | ||
1298 | * flush. | ||
1299 | */ | ||
1300 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
1301 | |||
1302 | /* | ||
1303 | * If one type of quotas is off, then it will lose its | 1317 | * If one type of quotas is off, then it will lose its |
1304 | * quotachecked status, since we won't be doing accounting for | 1318 | * quotachecked status, since we won't be doing accounting for |
1305 | * that type anymore. | 1319 | * that type anymore. |
@@ -1308,6 +1322,13 @@ xfs_qm_quotacheck( | |||
1308 | mp->m_qflags |= flags; | 1322 | mp->m_qflags |= flags; |
1309 | 1323 | ||
1310 | error_return: | 1324 | error_return: |
1325 | while (!list_empty(&buffer_list)) { | ||
1326 | struct xfs_buf *bp = | ||
1327 | list_first_entry(&buffer_list, struct xfs_buf, b_list); | ||
1328 | list_del_init(&bp->b_list); | ||
1329 | xfs_buf_relse(bp); | ||
1330 | } | ||
1331 | |||
1311 | if (error) { | 1332 | if (error) { |
1312 | xfs_warn(mp, | 1333 | xfs_warn(mp, |
1313 | "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", | 1334 | "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", |
@@ -1424,6 +1445,7 @@ xfs_qm_dqfree_one( | |||
1424 | STATIC void | 1445 | STATIC void |
1425 | xfs_qm_dqreclaim_one( | 1446 | xfs_qm_dqreclaim_one( |
1426 | struct xfs_dquot *dqp, | 1447 | struct xfs_dquot *dqp, |
1448 | struct list_head *buffer_list, | ||
1427 | struct list_head *dispose_list) | 1449 | struct list_head *dispose_list) |
1428 | { | 1450 | { |
1429 | struct xfs_mount *mp = dqp->q_mount; | 1451 | struct xfs_mount *mp = dqp->q_mount; |
@@ -1456,25 +1478,20 @@ xfs_qm_dqreclaim_one( | |||
1456 | if (!xfs_dqflock_nowait(dqp)) | 1478 | if (!xfs_dqflock_nowait(dqp)) |
1457 | goto out_busy; | 1479 | goto out_busy; |
1458 | 1480 | ||
1459 | /* | ||
1460 | * We have the flush lock so we know that this is not in the | ||
1461 | * process of being flushed. So, if this is dirty, flush it | ||
1462 | * DELWRI so that we don't get a freelist infested with | ||
1463 | * dirty dquots. | ||
1464 | */ | ||
1465 | if (XFS_DQ_IS_DIRTY(dqp)) { | 1481 | if (XFS_DQ_IS_DIRTY(dqp)) { |
1482 | struct xfs_buf *bp = NULL; | ||
1483 | |||
1466 | trace_xfs_dqreclaim_dirty(dqp); | 1484 | trace_xfs_dqreclaim_dirty(dqp); |
1467 | 1485 | ||
1468 | /* | 1486 | error = xfs_qm_dqflush(dqp, &bp); |
1469 | * We flush it delayed write, so don't bother releasing the | ||
1470 | * freelist lock. | ||
1471 | */ | ||
1472 | error = xfs_qm_dqflush(dqp, 0); | ||
1473 | if (error) { | 1487 | if (error) { |
1474 | xfs_warn(mp, "%s: dquot %p flush failed", | 1488 | xfs_warn(mp, "%s: dquot %p flush failed", |
1475 | __func__, dqp); | 1489 | __func__, dqp); |
1490 | goto out_busy; | ||
1476 | } | 1491 | } |
1477 | 1492 | ||
1493 | xfs_buf_delwri_queue(bp, buffer_list); | ||
1494 | xfs_buf_relse(bp); | ||
1478 | /* | 1495 | /* |
1479 | * Give the dquot another try on the freelist, as the | 1496 | * Give the dquot another try on the freelist, as the |
1480 | * flushing will take some time. | 1497 | * flushing will take some time. |
@@ -1518,8 +1535,10 @@ xfs_qm_shake( | |||
1518 | struct xfs_quotainfo *qi = | 1535 | struct xfs_quotainfo *qi = |
1519 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); | 1536 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); |
1520 | int nr_to_scan = sc->nr_to_scan; | 1537 | int nr_to_scan = sc->nr_to_scan; |
1538 | LIST_HEAD (buffer_list); | ||
1521 | LIST_HEAD (dispose_list); | 1539 | LIST_HEAD (dispose_list); |
1522 | struct xfs_dquot *dqp; | 1540 | struct xfs_dquot *dqp; |
1541 | int error; | ||
1523 | 1542 | ||
1524 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) | 1543 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) |
1525 | return 0; | 1544 | return 0; |
@@ -1532,15 +1551,20 @@ xfs_qm_shake( | |||
1532 | break; | 1551 | break; |
1533 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, | 1552 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, |
1534 | q_lru); | 1553 | q_lru); |
1535 | xfs_qm_dqreclaim_one(dqp, &dispose_list); | 1554 | xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); |
1536 | } | 1555 | } |
1537 | mutex_unlock(&qi->qi_lru_lock); | 1556 | mutex_unlock(&qi->qi_lru_lock); |
1538 | 1557 | ||
1558 | error = xfs_buf_delwri_submit(&buffer_list); | ||
1559 | if (error) | ||
1560 | xfs_warn(NULL, "%s: dquot reclaim failed", __func__); | ||
1561 | |||
1539 | while (!list_empty(&dispose_list)) { | 1562 | while (!list_empty(&dispose_list)) { |
1540 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); | 1563 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); |
1541 | list_del_init(&dqp->q_lru); | 1564 | list_del_init(&dqp->q_lru); |
1542 | xfs_qm_dqfree_one(dqp); | 1565 | xfs_qm_dqfree_one(dqp); |
1543 | } | 1566 | } |
1567 | |||
1544 | out: | 1568 | out: |
1545 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; | 1569 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; |
1546 | } | 1570 | } |
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index e6986b5d80d8..6b39115bf145 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c | |||
@@ -17,9 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | ||
21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index c4f396e437a8..858a3b186110 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include "xfs_fs.h" | 22 | #include "xfs_fs.h" |
23 | #include "xfs_bit.h" | 23 | #include "xfs_bit.h" |
24 | #include "xfs_log.h" | 24 | #include "xfs_log.h" |
25 | #include "xfs_inum.h" | ||
26 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
27 | #include "xfs_sb.h" | 26 | #include "xfs_sb.h" |
28 | #include "xfs_ag.h" | 27 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 7e76f537abb7..fed504fc2999 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c | |||
@@ -17,7 +17,6 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_sb.h" | 19 | #include "xfs_sb.h" |
20 | #include "xfs_inum.h" | ||
21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
22 | #include "xfs_ag.h" | 21 | #include "xfs_ag.h" |
23 | #include "xfs_mount.h" | 22 | #include "xfs_mount.h" |
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index e44ef7ee8ce8..30ff5f401d28 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ca4f31534a0a..92d4331cd4f1 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
@@ -34,7 +33,6 @@ | |||
34 | #include "xfs_rtalloc.h" | 33 | #include "xfs_rtalloc.h" |
35 | #include "xfs_fsops.h" | 34 | #include "xfs_fsops.h" |
36 | #include "xfs_error.h" | 35 | #include "xfs_error.h" |
37 | #include "xfs_rw.h" | ||
38 | #include "xfs_inode_item.h" | 36 | #include "xfs_inode_item.h" |
39 | #include "xfs_trans_space.h" | 37 | #include "xfs_trans_space.h" |
40 | #include "xfs_utils.h" | 38 | #include "xfs_utils.h" |
@@ -1872,9 +1870,9 @@ xfs_growfs_rt( | |||
1872 | /* | 1870 | /* |
1873 | * Read in the last block of the device, make sure it exists. | 1871 | * Read in the last block of the device, make sure it exists. |
1874 | */ | 1872 | */ |
1875 | bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, | 1873 | bp = xfs_buf_read_uncached(mp->m_rtdev_targp, |
1876 | XFS_FSB_TO_BB(mp, nrblocks - 1), | 1874 | XFS_FSB_TO_BB(mp, nrblocks - 1), |
1877 | XFS_FSB_TO_B(mp, 1), 0); | 1875 | XFS_FSB_TO_BB(mp, 1), 0); |
1878 | if (!bp) | 1876 | if (!bp) |
1879 | return EIO; | 1877 | return EIO; |
1880 | xfs_buf_relse(bp); | 1878 | xfs_buf_relse(bp); |
@@ -2219,9 +2217,9 @@ xfs_rtmount_init( | |||
2219 | (unsigned long long) mp->m_sb.sb_rblocks); | 2217 | (unsigned long long) mp->m_sb.sb_rblocks); |
2220 | return XFS_ERROR(EFBIG); | 2218 | return XFS_ERROR(EFBIG); |
2221 | } | 2219 | } |
2222 | bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, | 2220 | bp = xfs_buf_read_uncached(mp->m_rtdev_targp, |
2223 | d - XFS_FSB_TO_BB(mp, 1), | 2221 | d - XFS_FSB_TO_BB(mp, 1), |
2224 | XFS_FSB_TO_B(mp, 1), 0); | 2222 | XFS_FSB_TO_BB(mp, 1), 0); |
2225 | if (!bp) { | 2223 | if (!bp) { |
2226 | xfs_warn(mp, "realtime device size check failed"); | 2224 | xfs_warn(mp, "realtime device size check failed"); |
2227 | return EIO; | 2225 | return EIO; |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c deleted file mode 100644 index 597d044a09a1..000000000000 --- a/fs/xfs/xfs_rw.c +++ /dev/null | |||
@@ -1,156 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_types.h" | ||
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | ||
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_bmap_btree.h" | ||
29 | #include "xfs_dinode.h" | ||
30 | #include "xfs_inode.h" | ||
31 | #include "xfs_error.h" | ||
32 | #include "xfs_rw.h" | ||
33 | |||
34 | /* | ||
35 | * Force a shutdown of the filesystem instantly while keeping | ||
36 | * the filesystem consistent. We don't do an unmount here; just shutdown | ||
37 | * the shop, make sure that absolutely nothing persistent happens to | ||
38 | * this filesystem after this point. | ||
39 | */ | ||
40 | void | ||
41 | xfs_do_force_shutdown( | ||
42 | xfs_mount_t *mp, | ||
43 | int flags, | ||
44 | char *fname, | ||
45 | int lnnum) | ||
46 | { | ||
47 | int logerror; | ||
48 | |||
49 | logerror = flags & SHUTDOWN_LOG_IO_ERROR; | ||
50 | |||
51 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
52 | xfs_notice(mp, | ||
53 | "%s(0x%x) called from line %d of file %s. Return address = 0x%p", | ||
54 | __func__, flags, lnnum, fname, __return_address); | ||
55 | } | ||
56 | /* | ||
57 | * No need to duplicate efforts. | ||
58 | */ | ||
59 | if (XFS_FORCED_SHUTDOWN(mp) && !logerror) | ||
60 | return; | ||
61 | |||
62 | /* | ||
63 | * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't | ||
64 | * queue up anybody new on the log reservations, and wakes up | ||
65 | * everybody who's sleeping on log reservations to tell them | ||
66 | * the bad news. | ||
67 | */ | ||
68 | if (xfs_log_force_umount(mp, logerror)) | ||
69 | return; | ||
70 | |||
71 | if (flags & SHUTDOWN_CORRUPT_INCORE) { | ||
72 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, | ||
73 | "Corruption of in-memory data detected. Shutting down filesystem"); | ||
74 | if (XFS_ERRLEVEL_HIGH <= xfs_error_level) | ||
75 | xfs_stack_trace(); | ||
76 | } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
77 | if (logerror) { | ||
78 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, | ||
79 | "Log I/O Error Detected. Shutting down filesystem"); | ||
80 | } else if (flags & SHUTDOWN_DEVICE_REQ) { | ||
81 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, | ||
82 | "All device paths lost. Shutting down filesystem"); | ||
83 | } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { | ||
84 | xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, | ||
85 | "I/O Error Detected. Shutting down filesystem"); | ||
86 | } | ||
87 | } | ||
88 | if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { | ||
89 | xfs_alert(mp, | ||
90 | "Please umount the filesystem and rectify the problem(s)"); | ||
91 | } | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * This isn't an absolute requirement, but it is | ||
96 | * just a good idea to call xfs_read_buf instead of | ||
97 | * directly doing a read_buf call. For one, we shouldn't | ||
98 | * be doing this disk read if we are in SHUTDOWN state anyway, | ||
99 | * so this stops that from happening. Secondly, this does all | ||
100 | * the error checking stuff and the brelse if appropriate for | ||
101 | * the caller, so the code can be a little leaner. | ||
102 | */ | ||
103 | |||
104 | int | ||
105 | xfs_read_buf( | ||
106 | struct xfs_mount *mp, | ||
107 | xfs_buftarg_t *target, | ||
108 | xfs_daddr_t blkno, | ||
109 | int len, | ||
110 | uint flags, | ||
111 | xfs_buf_t **bpp) | ||
112 | { | ||
113 | xfs_buf_t *bp; | ||
114 | int error; | ||
115 | |||
116 | if (!flags) | ||
117 | flags = XBF_LOCK | XBF_MAPPED; | ||
118 | |||
119 | bp = xfs_buf_read(target, blkno, len, flags); | ||
120 | if (!bp) | ||
121 | return XFS_ERROR(EIO); | ||
122 | error = bp->b_error; | ||
123 | if (!error && !XFS_FORCED_SHUTDOWN(mp)) { | ||
124 | *bpp = bp; | ||
125 | } else { | ||
126 | *bpp = NULL; | ||
127 | if (error) { | ||
128 | xfs_buf_ioerror_alert(bp, __func__); | ||
129 | } else { | ||
130 | error = XFS_ERROR(EIO); | ||
131 | } | ||
132 | if (bp) { | ||
133 | XFS_BUF_UNDONE(bp); | ||
134 | xfs_buf_stale(bp); | ||
135 | /* | ||
136 | * brelse clears B_ERROR and b_error | ||
137 | */ | ||
138 | xfs_buf_relse(bp); | ||
139 | } | ||
140 | } | ||
141 | return (error); | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * helper function to extract extent size hint from inode | ||
146 | */ | ||
147 | xfs_extlen_t | ||
148 | xfs_get_extsz_hint( | ||
149 | struct xfs_inode *ip) | ||
150 | { | ||
151 | if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) | ||
152 | return ip->i_d.di_extsize; | ||
153 | if (XFS_IS_REALTIME_INODE(ip)) | ||
154 | return ip->i_mount->m_sb.sb_rextsize; | ||
155 | return 0; | ||
156 | } | ||
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h deleted file mode 100644 index bbdb9ad6a4ba..000000000000 --- a/fs/xfs/xfs_rw.h +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_RW_H__ | ||
19 | #define __XFS_RW_H__ | ||
20 | |||
21 | struct xfs_buf; | ||
22 | struct xfs_inode; | ||
23 | struct xfs_mount; | ||
24 | |||
25 | /* | ||
26 | * Convert the given file system block to a disk block. | ||
27 | * We have to treat it differently based on whether the | ||
28 | * file is a real time file or not, because the bmap code | ||
29 | * does. | ||
30 | */ | ||
31 | static inline xfs_daddr_t | ||
32 | xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) | ||
33 | { | ||
34 | return (XFS_IS_REALTIME_INODE(ip) ? \ | ||
35 | (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ | ||
36 | XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); | ||
37 | } | ||
38 | |||
39 | /* | ||
40 | * Prototypes for functions in xfs_rw.c. | ||
41 | */ | ||
42 | extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, | ||
43 | xfs_daddr_t blkno, int len, uint flags, | ||
44 | struct xfs_buf **bpp); | ||
45 | extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); | ||
46 | |||
47 | #endif /* __XFS_RW_H__ */ | ||
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index dab9a5f6dfd6..2fcfd5b0b046 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -17,7 +17,6 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include "xfs.h" | 19 | #include "xfs.h" |
20 | #include "xfs_bit.h" | ||
21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | 21 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
@@ -622,7 +621,7 @@ void | |||
622 | xfs_blkdev_issue_flush( | 621 | xfs_blkdev_issue_flush( |
623 | xfs_buftarg_t *buftarg) | 622 | xfs_buftarg_t *buftarg) |
624 | { | 623 | { |
625 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); | 624 | blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL); |
626 | } | 625 | } |
627 | 626 | ||
628 | STATIC void | 627 | STATIC void |
@@ -773,8 +772,14 @@ xfs_init_mount_workqueues( | |||
773 | if (!mp->m_unwritten_workqueue) | 772 | if (!mp->m_unwritten_workqueue) |
774 | goto out_destroy_data_iodone_queue; | 773 | goto out_destroy_data_iodone_queue; |
775 | 774 | ||
775 | mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", | ||
776 | WQ_MEM_RECLAIM, 0, mp->m_fsname); | ||
777 | if (!mp->m_cil_workqueue) | ||
778 | goto out_destroy_unwritten; | ||
776 | return 0; | 779 | return 0; |
777 | 780 | ||
781 | out_destroy_unwritten: | ||
782 | destroy_workqueue(mp->m_unwritten_workqueue); | ||
778 | out_destroy_data_iodone_queue: | 783 | out_destroy_data_iodone_queue: |
779 | destroy_workqueue(mp->m_data_workqueue); | 784 | destroy_workqueue(mp->m_data_workqueue); |
780 | out: | 785 | out: |
@@ -785,6 +790,7 @@ STATIC void | |||
785 | xfs_destroy_mount_workqueues( | 790 | xfs_destroy_mount_workqueues( |
786 | struct xfs_mount *mp) | 791 | struct xfs_mount *mp) |
787 | { | 792 | { |
793 | destroy_workqueue(mp->m_cil_workqueue); | ||
788 | destroy_workqueue(mp->m_data_workqueue); | 794 | destroy_workqueue(mp->m_data_workqueue); |
789 | destroy_workqueue(mp->m_unwritten_workqueue); | 795 | destroy_workqueue(mp->m_unwritten_workqueue); |
790 | } | 796 | } |
@@ -981,18 +987,9 @@ xfs_fs_put_super( | |||
981 | { | 987 | { |
982 | struct xfs_mount *mp = XFS_M(sb); | 988 | struct xfs_mount *mp = XFS_M(sb); |
983 | 989 | ||
984 | xfs_syncd_stop(mp); | ||
985 | |||
986 | /* | ||
987 | * Blow away any referenced inode in the filestreams cache. | ||
988 | * This can and will cause log traffic as inodes go inactive | ||
989 | * here. | ||
990 | */ | ||
991 | xfs_filestream_unmount(mp); | 990 | xfs_filestream_unmount(mp); |
992 | |||
993 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
994 | |||
995 | xfs_unmountfs(mp); | 991 | xfs_unmountfs(mp); |
992 | xfs_syncd_stop(mp); | ||
996 | xfs_freesb(mp); | 993 | xfs_freesb(mp); |
997 | xfs_icsb_destroy_counters(mp); | 994 | xfs_icsb_destroy_counters(mp); |
998 | xfs_destroy_mount_workqueues(mp); | 995 | xfs_destroy_mount_workqueues(mp); |
@@ -1072,7 +1069,7 @@ xfs_fs_statfs( | |||
1072 | 1069 | ||
1073 | spin_unlock(&mp->m_sb_lock); | 1070 | spin_unlock(&mp->m_sb_lock); |
1074 | 1071 | ||
1075 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || | 1072 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
1076 | ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == | 1073 | ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == |
1077 | (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) | 1074 | (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) |
1078 | xfs_qm_statvfs(ip, statp); | 1075 | xfs_qm_statvfs(ip, statp); |
@@ -1362,31 +1359,32 @@ xfs_fs_fill_super( | |||
1362 | sb->s_time_gran = 1; | 1359 | sb->s_time_gran = 1; |
1363 | set_posix_acl_flag(sb); | 1360 | set_posix_acl_flag(sb); |
1364 | 1361 | ||
1365 | error = xfs_mountfs(mp); | 1362 | error = xfs_syncd_init(mp); |
1366 | if (error) | 1363 | if (error) |
1367 | goto out_filestream_unmount; | 1364 | goto out_filestream_unmount; |
1368 | 1365 | ||
1369 | error = xfs_syncd_init(mp); | 1366 | error = xfs_mountfs(mp); |
1370 | if (error) | 1367 | if (error) |
1371 | goto out_unmount; | 1368 | goto out_syncd_stop; |
1372 | 1369 | ||
1373 | root = igrab(VFS_I(mp->m_rootip)); | 1370 | root = igrab(VFS_I(mp->m_rootip)); |
1374 | if (!root) { | 1371 | if (!root) { |
1375 | error = ENOENT; | 1372 | error = ENOENT; |
1376 | goto out_syncd_stop; | 1373 | goto out_unmount; |
1377 | } | 1374 | } |
1378 | if (is_bad_inode(root)) { | 1375 | if (is_bad_inode(root)) { |
1379 | error = EINVAL; | 1376 | error = EINVAL; |
1380 | goto out_syncd_stop; | 1377 | goto out_unmount; |
1381 | } | 1378 | } |
1382 | sb->s_root = d_make_root(root); | 1379 | sb->s_root = d_make_root(root); |
1383 | if (!sb->s_root) { | 1380 | if (!sb->s_root) { |
1384 | error = ENOMEM; | 1381 | error = ENOMEM; |
1385 | goto out_syncd_stop; | 1382 | goto out_unmount; |
1386 | } | 1383 | } |
1387 | 1384 | ||
1388 | return 0; | 1385 | return 0; |
1389 | 1386 | out_syncd_stop: | |
1387 | xfs_syncd_stop(mp); | ||
1390 | out_filestream_unmount: | 1388 | out_filestream_unmount: |
1391 | xfs_filestream_unmount(mp); | 1389 | xfs_filestream_unmount(mp); |
1392 | out_free_sb: | 1390 | out_free_sb: |
@@ -1403,19 +1401,10 @@ out_destroy_workqueues: | |||
1403 | out: | 1401 | out: |
1404 | return -error; | 1402 | return -error; |
1405 | 1403 | ||
1406 | out_syncd_stop: | ||
1407 | xfs_syncd_stop(mp); | ||
1408 | out_unmount: | 1404 | out_unmount: |
1409 | /* | ||
1410 | * Blow away any referenced inode in the filestreams cache. | ||
1411 | * This can and will cause log traffic as inodes go inactive | ||
1412 | * here. | ||
1413 | */ | ||
1414 | xfs_filestream_unmount(mp); | 1405 | xfs_filestream_unmount(mp); |
1415 | |||
1416 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
1417 | |||
1418 | xfs_unmountfs(mp); | 1406 | xfs_unmountfs(mp); |
1407 | xfs_syncd_stop(mp); | ||
1419 | goto out_free_sb; | 1408 | goto out_free_sb; |
1420 | } | 1409 | } |
1421 | 1410 | ||
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 205ebcb34d9e..c9d3409c5ca3 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
@@ -241,45 +240,6 @@ xfs_sync_inode_data( | |||
241 | return error; | 240 | return error; |
242 | } | 241 | } |
243 | 242 | ||
244 | STATIC int | ||
245 | xfs_sync_inode_attr( | ||
246 | struct xfs_inode *ip, | ||
247 | struct xfs_perag *pag, | ||
248 | int flags) | ||
249 | { | ||
250 | int error = 0; | ||
251 | |||
252 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
253 | if (xfs_inode_clean(ip)) | ||
254 | goto out_unlock; | ||
255 | if (!xfs_iflock_nowait(ip)) { | ||
256 | if (!(flags & SYNC_WAIT)) | ||
257 | goto out_unlock; | ||
258 | xfs_iflock(ip); | ||
259 | } | ||
260 | |||
261 | if (xfs_inode_clean(ip)) { | ||
262 | xfs_ifunlock(ip); | ||
263 | goto out_unlock; | ||
264 | } | ||
265 | |||
266 | error = xfs_iflush(ip, flags); | ||
267 | |||
268 | /* | ||
269 | * We don't want to try again on non-blocking flushes that can't run | ||
270 | * again immediately. If an inode really must be written, then that's | ||
271 | * what the SYNC_WAIT flag is for. | ||
272 | */ | ||
273 | if (error == EAGAIN) { | ||
274 | ASSERT(!(flags & SYNC_WAIT)); | ||
275 | error = 0; | ||
276 | } | ||
277 | |||
278 | out_unlock: | ||
279 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
280 | return error; | ||
281 | } | ||
282 | |||
283 | /* | 243 | /* |
284 | * Write out pagecache data for the whole filesystem. | 244 | * Write out pagecache data for the whole filesystem. |
285 | */ | 245 | */ |
@@ -300,19 +260,6 @@ xfs_sync_data( | |||
300 | return 0; | 260 | return 0; |
301 | } | 261 | } |
302 | 262 | ||
303 | /* | ||
304 | * Write out inode metadata (attributes) for the whole filesystem. | ||
305 | */ | ||
306 | STATIC int | ||
307 | xfs_sync_attr( | ||
308 | struct xfs_mount *mp, | ||
309 | int flags) | ||
310 | { | ||
311 | ASSERT((flags & ~SYNC_WAIT) == 0); | ||
312 | |||
313 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); | ||
314 | } | ||
315 | |||
316 | STATIC int | 263 | STATIC int |
317 | xfs_sync_fsdata( | 264 | xfs_sync_fsdata( |
318 | struct xfs_mount *mp) | 265 | struct xfs_mount *mp) |
@@ -350,7 +297,7 @@ xfs_sync_fsdata( | |||
350 | * First stage of freeze - no writers will make progress now we are here, | 297 | * First stage of freeze - no writers will make progress now we are here, |
351 | * so we flush delwri and delalloc buffers here, then wait for all I/O to | 298 | * so we flush delwri and delalloc buffers here, then wait for all I/O to |
352 | * complete. Data is frozen at that point. Metadata is not frozen, | 299 | * complete. Data is frozen at that point. Metadata is not frozen, |
353 | * transactions can still occur here so don't bother flushing the buftarg | 300 | * transactions can still occur here so don't bother emptying the AIL |
354 | * because it'll just get dirty again. | 301 | * because it'll just get dirty again. |
355 | */ | 302 | */ |
356 | int | 303 | int |
@@ -365,47 +312,13 @@ xfs_quiesce_data( | |||
365 | /* write superblock and hoover up shutdown errors */ | 312 | /* write superblock and hoover up shutdown errors */ |
366 | error = xfs_sync_fsdata(mp); | 313 | error = xfs_sync_fsdata(mp); |
367 | 314 | ||
368 | /* make sure all delwri buffers are written out */ | ||
369 | xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
370 | |||
371 | /* mark the log as covered if needed */ | 315 | /* mark the log as covered if needed */ |
372 | if (xfs_log_need_covered(mp)) | 316 | if (xfs_log_need_covered(mp)) |
373 | error2 = xfs_fs_log_dummy(mp); | 317 | error2 = xfs_fs_log_dummy(mp); |
374 | 318 | ||
375 | /* flush data-only devices */ | ||
376 | if (mp->m_rtdev_targp) | ||
377 | xfs_flush_buftarg(mp->m_rtdev_targp, 1); | ||
378 | |||
379 | return error ? error : error2; | 319 | return error ? error : error2; |
380 | } | 320 | } |
381 | 321 | ||
382 | STATIC void | ||
383 | xfs_quiesce_fs( | ||
384 | struct xfs_mount *mp) | ||
385 | { | ||
386 | int count = 0, pincount; | ||
387 | |||
388 | xfs_reclaim_inodes(mp, 0); | ||
389 | xfs_flush_buftarg(mp->m_ddev_targp, 0); | ||
390 | |||
391 | /* | ||
392 | * This loop must run at least twice. The first instance of the loop | ||
393 | * will flush most meta data but that will generate more meta data | ||
394 | * (typically directory updates). Which then must be flushed and | ||
395 | * logged before we can write the unmount record. We also so sync | ||
396 | * reclaim of inodes to catch any that the above delwri flush skipped. | ||
397 | */ | ||
398 | do { | ||
399 | xfs_reclaim_inodes(mp, SYNC_WAIT); | ||
400 | xfs_sync_attr(mp, SYNC_WAIT); | ||
401 | pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
402 | if (!pincount) { | ||
403 | delay(50); | ||
404 | count++; | ||
405 | } | ||
406 | } while (count < 2); | ||
407 | } | ||
408 | |||
409 | /* | 322 | /* |
410 | * Second stage of a quiesce. The data is already synced, now we have to take | 323 | * Second stage of a quiesce. The data is already synced, now we have to take |
411 | * care of the metadata. New transactions are already blocked, so we need to | 324 | * care of the metadata. New transactions are already blocked, so we need to |
@@ -421,8 +334,12 @@ xfs_quiesce_attr( | |||
421 | while (atomic_read(&mp->m_active_trans) > 0) | 334 | while (atomic_read(&mp->m_active_trans) > 0) |
422 | delay(100); | 335 | delay(100); |
423 | 336 | ||
424 | /* flush inodes and push all remaining buffers out to disk */ | 337 | /* reclaim inodes to do any IO before the freeze completes */ |
425 | xfs_quiesce_fs(mp); | 338 | xfs_reclaim_inodes(mp, 0); |
339 | xfs_reclaim_inodes(mp, SYNC_WAIT); | ||
340 | |||
341 | /* flush all pending changes from the AIL */ | ||
342 | xfs_ail_push_all_sync(mp->m_ail); | ||
426 | 343 | ||
427 | /* | 344 | /* |
428 | * Just warn here till VFS can correctly support | 345 | * Just warn here till VFS can correctly support |
@@ -436,7 +353,12 @@ xfs_quiesce_attr( | |||
436 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " | 353 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " |
437 | "Frozen image may not be consistent."); | 354 | "Frozen image may not be consistent."); |
438 | xfs_log_unmount_write(mp); | 355 | xfs_log_unmount_write(mp); |
439 | xfs_unmountfs_writesb(mp); | 356 | |
357 | /* | ||
358 | * At this point we might have modified the superblock again and thus | ||
359 | * added an item to the AIL, thus flush it again. | ||
360 | */ | ||
361 | xfs_ail_push_all_sync(mp->m_ail); | ||
440 | } | 362 | } |
441 | 363 | ||
442 | static void | 364 | static void |
@@ -460,16 +382,27 @@ xfs_sync_worker( | |||
460 | struct xfs_mount, m_sync_work); | 382 | struct xfs_mount, m_sync_work); |
461 | int error; | 383 | int error; |
462 | 384 | ||
463 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | 385 | /* |
464 | /* dgc: errors ignored here */ | 386 | * We shouldn't write/force the log if we are in the mount/unmount |
465 | if (mp->m_super->s_frozen == SB_UNFROZEN && | 387 | * process or on a read only filesystem. The workqueue still needs to be |
466 | xfs_log_need_covered(mp)) | 388 | * active in both cases, however, because it is used for inode reclaim |
467 | error = xfs_fs_log_dummy(mp); | 389 | * during these times. Use the s_umount semaphore to provide exclusion |
468 | else | 390 | * with unmount. |
469 | xfs_log_force(mp, 0); | 391 | */ |
470 | 392 | if (down_read_trylock(&mp->m_super->s_umount)) { | |
471 | /* start pushing all the metadata that is currently dirty */ | 393 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { |
472 | xfs_ail_push_all(mp->m_ail); | 394 | /* dgc: errors ignored here */ |
395 | if (mp->m_super->s_frozen == SB_UNFROZEN && | ||
396 | xfs_log_need_covered(mp)) | ||
397 | error = xfs_fs_log_dummy(mp); | ||
398 | else | ||
399 | xfs_log_force(mp, 0); | ||
400 | |||
401 | /* start pushing all the metadata that is currently | ||
402 | * dirty */ | ||
403 | xfs_ail_push_all(mp->m_ail); | ||
404 | } | ||
405 | up_read(&mp->m_super->s_umount); | ||
473 | } | 406 | } |
474 | 407 | ||
475 | /* queue us up again */ | 408 | /* queue us up again */ |
@@ -488,14 +421,6 @@ xfs_syncd_queue_reclaim( | |||
488 | struct xfs_mount *mp) | 421 | struct xfs_mount *mp) |
489 | { | 422 | { |
490 | 423 | ||
491 | /* | ||
492 | * We can have inodes enter reclaim after we've shut down the syncd | ||
493 | * workqueue during unmount, so don't allow reclaim work to be queued | ||
494 | * during unmount. | ||
495 | */ | ||
496 | if (!(mp->m_super->s_flags & MS_ACTIVE)) | ||
497 | return; | ||
498 | |||
499 | rcu_read_lock(); | 424 | rcu_read_lock(); |
500 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { | 425 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { |
501 | queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, | 426 | queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, |
@@ -564,7 +489,6 @@ xfs_syncd_init( | |||
564 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); | 489 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); |
565 | 490 | ||
566 | xfs_syncd_queue_sync(mp); | 491 | xfs_syncd_queue_sync(mp); |
567 | xfs_syncd_queue_reclaim(mp); | ||
568 | 492 | ||
569 | return 0; | 493 | return 0; |
570 | } | 494 | } |
@@ -702,11 +626,8 @@ xfs_reclaim_inode_grab( | |||
702 | } | 626 | } |
703 | 627 | ||
704 | /* | 628 | /* |
705 | * Inodes in different states need to be treated differently, and the return | 629 | * Inodes in different states need to be treated differently. The following |
706 | * value of xfs_iflush is not sufficient to get this right. The following table | 630 | * table lists the inode states and the reclaim actions necessary: |
707 | * lists the inode states and the reclaim actions necessary for non-blocking | ||
708 | * reclaim: | ||
709 | * | ||
710 | * | 631 | * |
711 | * inode state iflush ret required action | 632 | * inode state iflush ret required action |
712 | * --------------- ---------- --------------- | 633 | * --------------- ---------- --------------- |
@@ -716,39 +637,31 @@ xfs_reclaim_inode_grab( | |||
716 | * stale, unpinned 0 reclaim | 637 | * stale, unpinned 0 reclaim |
717 | * clean, pinned(*) 0 requeue | 638 | * clean, pinned(*) 0 requeue |
718 | * stale, pinned EAGAIN requeue | 639 | * stale, pinned EAGAIN requeue |
719 | * dirty, delwri ok 0 requeue | 640 | * dirty, async - requeue |
720 | * dirty, delwri blocked EAGAIN requeue | 641 | * dirty, sync 0 reclaim |
721 | * dirty, sync flush 0 reclaim | ||
722 | * | 642 | * |
723 | * (*) dgc: I don't think the clean, pinned state is possible but it gets | 643 | * (*) dgc: I don't think the clean, pinned state is possible but it gets |
724 | * handled anyway given the order of checks implemented. | 644 | * handled anyway given the order of checks implemented. |
725 | * | 645 | * |
726 | * As can be seen from the table, the return value of xfs_iflush() is not | ||
727 | * sufficient to correctly decide the reclaim action here. The checks in | ||
728 | * xfs_iflush() might look like duplicates, but they are not. | ||
729 | * | ||
730 | * Also, because we get the flush lock first, we know that any inode that has | 646 | * Also, because we get the flush lock first, we know that any inode that has |
731 | * been flushed delwri has had the flush completed by the time we check that | 647 | * been flushed delwri has had the flush completed by the time we check that |
732 | * the inode is clean. The clean inode check needs to be done before flushing | 648 | * the inode is clean. |
733 | * the inode delwri otherwise we would loop forever requeuing clean inodes as | ||
734 | * we cannot tell apart a successful delwri flush and a clean inode from the | ||
735 | * return value of xfs_iflush(). | ||
736 | * | 649 | * |
737 | * Note that because the inode is flushed delayed write by background | 650 | * Note that because the inode is flushed delayed write by AIL pushing, the |
738 | * writeback, the flush lock may already be held here and waiting on it can | 651 | * flush lock may already be held here and waiting on it can result in very |
739 | * result in very long latencies. Hence for sync reclaims, where we wait on the | 652 | * long latencies. Hence for sync reclaims, where we wait on the flush lock, |
740 | * flush lock, the caller should push out delayed write inodes first before | 653 | * the caller should push the AIL first before trying to reclaim inodes to |
741 | * trying to reclaim them to minimise the amount of time spent waiting. For | 654 | * minimise the amount of time spent waiting. For background relaim, we only |
742 | * background relaim, we just requeue the inode for the next pass. | 655 | * bother to reclaim clean inodes anyway. |
743 | * | 656 | * |
744 | * Hence the order of actions after gaining the locks should be: | 657 | * Hence the order of actions after gaining the locks should be: |
745 | * bad => reclaim | 658 | * bad => reclaim |
746 | * shutdown => unpin and reclaim | 659 | * shutdown => unpin and reclaim |
747 | * pinned, delwri => requeue | 660 | * pinned, async => requeue |
748 | * pinned, sync => unpin | 661 | * pinned, sync => unpin |
749 | * stale => reclaim | 662 | * stale => reclaim |
750 | * clean => reclaim | 663 | * clean => reclaim |
751 | * dirty, delwri => flush and requeue | 664 | * dirty, async => requeue |
752 | * dirty, sync => flush, wait and reclaim | 665 | * dirty, sync => flush, wait and reclaim |
753 | */ | 666 | */ |
754 | STATIC int | 667 | STATIC int |
@@ -757,7 +670,8 @@ xfs_reclaim_inode( | |||
757 | struct xfs_perag *pag, | 670 | struct xfs_perag *pag, |
758 | int sync_mode) | 671 | int sync_mode) |
759 | { | 672 | { |
760 | int error; | 673 | struct xfs_buf *bp = NULL; |
674 | int error; | ||
761 | 675 | ||
762 | restart: | 676 | restart: |
763 | error = 0; | 677 | error = 0; |
@@ -765,17 +679,6 @@ restart: | |||
765 | if (!xfs_iflock_nowait(ip)) { | 679 | if (!xfs_iflock_nowait(ip)) { |
766 | if (!(sync_mode & SYNC_WAIT)) | 680 | if (!(sync_mode & SYNC_WAIT)) |
767 | goto out; | 681 | goto out; |
768 | |||
769 | /* | ||
770 | * If we only have a single dirty inode in a cluster there is | ||
771 | * a fair chance that the AIL push may have pushed it into | ||
772 | * the buffer, but xfsbufd won't touch it until 30 seconds | ||
773 | * from now, and thus we will lock up here. | ||
774 | * | ||
775 | * Promote the inode buffer to the front of the delwri list | ||
776 | * and wake up xfsbufd now. | ||
777 | */ | ||
778 | xfs_promote_inode(ip); | ||
779 | xfs_iflock(ip); | 682 | xfs_iflock(ip); |
780 | } | 683 | } |
781 | 684 | ||
@@ -783,13 +686,12 @@ restart: | |||
783 | goto reclaim; | 686 | goto reclaim; |
784 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 687 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
785 | xfs_iunpin_wait(ip); | 688 | xfs_iunpin_wait(ip); |
689 | xfs_iflush_abort(ip, false); | ||
786 | goto reclaim; | 690 | goto reclaim; |
787 | } | 691 | } |
788 | if (xfs_ipincount(ip)) { | 692 | if (xfs_ipincount(ip)) { |
789 | if (!(sync_mode & SYNC_WAIT)) { | 693 | if (!(sync_mode & SYNC_WAIT)) |
790 | xfs_ifunlock(ip); | 694 | goto out_ifunlock; |
791 | goto out; | ||
792 | } | ||
793 | xfs_iunpin_wait(ip); | 695 | xfs_iunpin_wait(ip); |
794 | } | 696 | } |
795 | if (xfs_iflags_test(ip, XFS_ISTALE)) | 697 | if (xfs_iflags_test(ip, XFS_ISTALE)) |
@@ -798,60 +700,42 @@ restart: | |||
798 | goto reclaim; | 700 | goto reclaim; |
799 | 701 | ||
800 | /* | 702 | /* |
703 | * Never flush out dirty data during non-blocking reclaim, as it would | ||
704 | * just contend with AIL pushing trying to do the same job. | ||
705 | */ | ||
706 | if (!(sync_mode & SYNC_WAIT)) | ||
707 | goto out_ifunlock; | ||
708 | |||
709 | /* | ||
801 | * Now we have an inode that needs flushing. | 710 | * Now we have an inode that needs flushing. |
802 | * | 711 | * |
803 | * We do a nonblocking flush here even if we are doing a SYNC_WAIT | 712 | * Note that xfs_iflush will never block on the inode buffer lock, as |
804 | * reclaim as we can deadlock with inode cluster removal. | ||
805 | * xfs_ifree_cluster() can lock the inode buffer before it locks the | 713 | * xfs_ifree_cluster() can lock the inode buffer before it locks the |
806 | * ip->i_lock, and we are doing the exact opposite here. As a result, | 714 | * ip->i_lock, and we are doing the exact opposite here. As a result, |
807 | * doing a blocking xfs_itobp() to get the cluster buffer will result | 715 | * doing a blocking xfs_itobp() to get the cluster buffer would result |
808 | * in an ABBA deadlock with xfs_ifree_cluster(). | 716 | * in an ABBA deadlock with xfs_ifree_cluster(). |
809 | * | 717 | * |
810 | * As xfs_ifree_cluser() must gather all inodes that are active in the | 718 | * As xfs_ifree_cluser() must gather all inodes that are active in the |
811 | * cache to mark them stale, if we hit this case we don't actually want | 719 | * cache to mark them stale, if we hit this case we don't actually want |
812 | * to do IO here - we want the inode marked stale so we can simply | 720 | * to do IO here - we want the inode marked stale so we can simply |
813 | * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, | 721 | * reclaim it. Hence if we get an EAGAIN error here, just unlock the |
814 | * just unlock the inode, back off and try again. Hopefully the next | 722 | * inode, back off and try again. Hopefully the next pass through will |
815 | * pass through will see the stale flag set on the inode. | 723 | * see the stale flag set on the inode. |
816 | */ | 724 | */ |
817 | error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); | 725 | error = xfs_iflush(ip, &bp); |
818 | if (sync_mode & SYNC_WAIT) { | 726 | if (error == EAGAIN) { |
819 | if (error == EAGAIN) { | 727 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
820 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 728 | /* backoff longer than in xfs_ifree_cluster */ |
821 | /* backoff longer than in xfs_ifree_cluster */ | 729 | delay(2); |
822 | delay(2); | 730 | goto restart; |
823 | goto restart; | ||
824 | } | ||
825 | xfs_iflock(ip); | ||
826 | goto reclaim; | ||
827 | } | 731 | } |
828 | 732 | ||
829 | /* | 733 | if (!error) { |
830 | * When we have to flush an inode but don't have SYNC_WAIT set, we | 734 | error = xfs_bwrite(bp); |
831 | * flush the inode out using a delwri buffer and wait for the next | 735 | xfs_buf_relse(bp); |
832 | * call into reclaim to find it in a clean state instead of waiting for | ||
833 | * it now. We also don't return errors here - if the error is transient | ||
834 | * then the next reclaim pass will flush the inode, and if the error | ||
835 | * is permanent then the next sync reclaim will reclaim the inode and | ||
836 | * pass on the error. | ||
837 | */ | ||
838 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
839 | xfs_warn(ip->i_mount, | ||
840 | "inode 0x%llx background reclaim flush failed with %d", | ||
841 | (long long)ip->i_ino, error); | ||
842 | } | 736 | } |
843 | out: | ||
844 | xfs_iflags_clear(ip, XFS_IRECLAIM); | ||
845 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
846 | /* | ||
847 | * We could return EAGAIN here to make reclaim rescan the inode tree in | ||
848 | * a short while. However, this just burns CPU time scanning the tree | ||
849 | * waiting for IO to complete and xfssyncd never goes back to the idle | ||
850 | * state. Instead, return 0 to let the next scheduled background reclaim | ||
851 | * attempt to reclaim the inode again. | ||
852 | */ | ||
853 | return 0; | ||
854 | 737 | ||
738 | xfs_iflock(ip); | ||
855 | reclaim: | 739 | reclaim: |
856 | xfs_ifunlock(ip); | 740 | xfs_ifunlock(ip); |
857 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 741 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
@@ -884,8 +768,21 @@ reclaim: | |||
884 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 768 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
885 | 769 | ||
886 | xfs_inode_free(ip); | 770 | xfs_inode_free(ip); |
887 | |||
888 | return error; | 771 | return error; |
772 | |||
773 | out_ifunlock: | ||
774 | xfs_ifunlock(ip); | ||
775 | out: | ||
776 | xfs_iflags_clear(ip, XFS_IRECLAIM); | ||
777 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
778 | /* | ||
779 | * We could return EAGAIN here to make reclaim rescan the inode tree in | ||
780 | * a short while. However, this just burns CPU time scanning the tree | ||
781 | * waiting for IO to complete and xfssyncd never goes back to the idle | ||
782 | * state. Instead, return 0 to let the next scheduled background reclaim | ||
783 | * attempt to reclaim the inode again. | ||
784 | */ | ||
785 | return 0; | ||
889 | } | 786 | } |
890 | 787 | ||
891 | /* | 788 | /* |
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 9010ce885e6a..624bedd81357 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c | |||
@@ -18,9 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 06838c42b2a0..7cf9d3529e51 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -281,7 +281,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, | |||
281 | TP_STRUCT__entry( | 281 | TP_STRUCT__entry( |
282 | __field(dev_t, dev) | 282 | __field(dev_t, dev) |
283 | __field(xfs_daddr_t, bno) | 283 | __field(xfs_daddr_t, bno) |
284 | __field(size_t, buffer_length) | 284 | __field(int, nblks) |
285 | __field(int, hold) | 285 | __field(int, hold) |
286 | __field(int, pincount) | 286 | __field(int, pincount) |
287 | __field(unsigned, lockval) | 287 | __field(unsigned, lockval) |
@@ -291,18 +291,18 @@ DECLARE_EVENT_CLASS(xfs_buf_class, | |||
291 | TP_fast_assign( | 291 | TP_fast_assign( |
292 | __entry->dev = bp->b_target->bt_dev; | 292 | __entry->dev = bp->b_target->bt_dev; |
293 | __entry->bno = bp->b_bn; | 293 | __entry->bno = bp->b_bn; |
294 | __entry->buffer_length = bp->b_buffer_length; | 294 | __entry->nblks = bp->b_length; |
295 | __entry->hold = atomic_read(&bp->b_hold); | 295 | __entry->hold = atomic_read(&bp->b_hold); |
296 | __entry->pincount = atomic_read(&bp->b_pin_count); | 296 | __entry->pincount = atomic_read(&bp->b_pin_count); |
297 | __entry->lockval = bp->b_sema.count; | 297 | __entry->lockval = bp->b_sema.count; |
298 | __entry->flags = bp->b_flags; | 298 | __entry->flags = bp->b_flags; |
299 | __entry->caller_ip = caller_ip; | 299 | __entry->caller_ip = caller_ip; |
300 | ), | 300 | ), |
301 | TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " | 301 | TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " |
302 | "lock %d flags %s caller %pf", | 302 | "lock %d flags %s caller %pf", |
303 | MAJOR(__entry->dev), MINOR(__entry->dev), | 303 | MAJOR(__entry->dev), MINOR(__entry->dev), |
304 | (unsigned long long)__entry->bno, | 304 | (unsigned long long)__entry->bno, |
305 | __entry->buffer_length, | 305 | __entry->nblks, |
306 | __entry->hold, | 306 | __entry->hold, |
307 | __entry->pincount, | 307 | __entry->pincount, |
308 | __entry->lockval, | 308 | __entry->lockval, |
@@ -328,7 +328,7 @@ DEFINE_BUF_EVENT(xfs_buf_unlock); | |||
328 | DEFINE_BUF_EVENT(xfs_buf_iowait); | 328 | DEFINE_BUF_EVENT(xfs_buf_iowait); |
329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); | 329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); |
330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); | 330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); |
331 | DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); | 331 | DEFINE_BUF_EVENT(xfs_buf_delwri_queued); |
332 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); | 332 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); |
333 | DEFINE_BUF_EVENT(xfs_buf_get_uncached); | 333 | DEFINE_BUF_EVENT(xfs_buf_get_uncached); |
334 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); | 334 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); |
@@ -362,7 +362,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, | |||
362 | TP_fast_assign( | 362 | TP_fast_assign( |
363 | __entry->dev = bp->b_target->bt_dev; | 363 | __entry->dev = bp->b_target->bt_dev; |
364 | __entry->bno = bp->b_bn; | 364 | __entry->bno = bp->b_bn; |
365 | __entry->buffer_length = bp->b_buffer_length; | 365 | __entry->buffer_length = BBTOB(bp->b_length); |
366 | __entry->flags = flags; | 366 | __entry->flags = flags; |
367 | __entry->hold = atomic_read(&bp->b_hold); | 367 | __entry->hold = atomic_read(&bp->b_hold); |
368 | __entry->pincount = atomic_read(&bp->b_pin_count); | 368 | __entry->pincount = atomic_read(&bp->b_pin_count); |
@@ -406,7 +406,7 @@ TRACE_EVENT(xfs_buf_ioerror, | |||
406 | TP_fast_assign( | 406 | TP_fast_assign( |
407 | __entry->dev = bp->b_target->bt_dev; | 407 | __entry->dev = bp->b_target->bt_dev; |
408 | __entry->bno = bp->b_bn; | 408 | __entry->bno = bp->b_bn; |
409 | __entry->buffer_length = bp->b_buffer_length; | 409 | __entry->buffer_length = BBTOB(bp->b_length); |
410 | __entry->hold = atomic_read(&bp->b_hold); | 410 | __entry->hold = atomic_read(&bp->b_hold); |
411 | __entry->pincount = atomic_read(&bp->b_pin_count); | 411 | __entry->pincount = atomic_read(&bp->b_pin_count); |
412 | __entry->lockval = bp->b_sema.count; | 412 | __entry->lockval = bp->b_sema.count; |
@@ -450,7 +450,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class, | |||
450 | __entry->bli_recur = bip->bli_recur; | 450 | __entry->bli_recur = bip->bli_recur; |
451 | __entry->bli_refcount = atomic_read(&bip->bli_refcount); | 451 | __entry->bli_refcount = atomic_read(&bip->bli_refcount); |
452 | __entry->buf_bno = bip->bli_buf->b_bn; | 452 | __entry->buf_bno = bip->bli_buf->b_bn; |
453 | __entry->buf_len = bip->bli_buf->b_buffer_length; | 453 | __entry->buf_len = BBTOB(bip->bli_buf->b_length); |
454 | __entry->buf_flags = bip->bli_buf->b_flags; | 454 | __entry->buf_flags = bip->bli_buf->b_flags; |
455 | __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); | 455 | __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); |
456 | __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); | 456 | __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); |
@@ -486,12 +486,10 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); | |||
486 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); | 486 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); |
487 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); | 487 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); |
488 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); | 488 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); |
489 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock); | ||
490 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); | 489 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); |
491 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); | 490 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); |
492 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); | 491 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); |
493 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); | 492 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); |
494 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); | ||
495 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); | 493 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); |
496 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); | 494 | DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); |
497 | DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); | 495 | DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); |
@@ -876,15 +874,30 @@ DECLARE_EVENT_CLASS(xfs_log_item_class, | |||
876 | __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) | 874 | __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) |
877 | ) | 875 | ) |
878 | 876 | ||
877 | TRACE_EVENT(xfs_log_force, | ||
878 | TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn), | ||
879 | TP_ARGS(mp, lsn), | ||
880 | TP_STRUCT__entry( | ||
881 | __field(dev_t, dev) | ||
882 | __field(xfs_lsn_t, lsn) | ||
883 | ), | ||
884 | TP_fast_assign( | ||
885 | __entry->dev = mp->m_super->s_dev; | ||
886 | __entry->lsn = lsn; | ||
887 | ), | ||
888 | TP_printk("dev %d:%d lsn 0x%llx", | ||
889 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
890 | __entry->lsn) | ||
891 | ) | ||
892 | |||
879 | #define DEFINE_LOG_ITEM_EVENT(name) \ | 893 | #define DEFINE_LOG_ITEM_EVENT(name) \ |
880 | DEFINE_EVENT(xfs_log_item_class, name, \ | 894 | DEFINE_EVENT(xfs_log_item_class, name, \ |
881 | TP_PROTO(struct xfs_log_item *lip), \ | 895 | TP_PROTO(struct xfs_log_item *lip), \ |
882 | TP_ARGS(lip)) | 896 | TP_ARGS(lip)) |
883 | DEFINE_LOG_ITEM_EVENT(xfs_ail_push); | 897 | DEFINE_LOG_ITEM_EVENT(xfs_ail_push); |
884 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf); | ||
885 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf_pinned); | ||
886 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); | 898 | DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); |
887 | DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); | 899 | DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); |
900 | DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); | ||
888 | 901 | ||
889 | 902 | ||
890 | DECLARE_EVENT_CLASS(xfs_file_class, | 903 | DECLARE_EVENT_CLASS(xfs_file_class, |
@@ -1145,7 +1158,7 @@ TRACE_EVENT(xfs_bunmap, | |||
1145 | 1158 | ||
1146 | ); | 1159 | ); |
1147 | 1160 | ||
1148 | DECLARE_EVENT_CLASS(xfs_busy_class, | 1161 | DECLARE_EVENT_CLASS(xfs_extent_busy_class, |
1149 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1162 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1150 | xfs_agblock_t agbno, xfs_extlen_t len), | 1163 | xfs_agblock_t agbno, xfs_extlen_t len), |
1151 | TP_ARGS(mp, agno, agbno, len), | 1164 | TP_ARGS(mp, agno, agbno, len), |
@@ -1168,17 +1181,17 @@ DECLARE_EVENT_CLASS(xfs_busy_class, | |||
1168 | __entry->len) | 1181 | __entry->len) |
1169 | ); | 1182 | ); |
1170 | #define DEFINE_BUSY_EVENT(name) \ | 1183 | #define DEFINE_BUSY_EVENT(name) \ |
1171 | DEFINE_EVENT(xfs_busy_class, name, \ | 1184 | DEFINE_EVENT(xfs_extent_busy_class, name, \ |
1172 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ | 1185 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ |
1173 | xfs_agblock_t agbno, xfs_extlen_t len), \ | 1186 | xfs_agblock_t agbno, xfs_extlen_t len), \ |
1174 | TP_ARGS(mp, agno, agbno, len)) | 1187 | TP_ARGS(mp, agno, agbno, len)) |
1175 | DEFINE_BUSY_EVENT(xfs_alloc_busy); | 1188 | DEFINE_BUSY_EVENT(xfs_extent_busy); |
1176 | DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); | 1189 | DEFINE_BUSY_EVENT(xfs_extent_busy_enomem); |
1177 | DEFINE_BUSY_EVENT(xfs_alloc_busy_force); | 1190 | DEFINE_BUSY_EVENT(xfs_extent_busy_force); |
1178 | DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); | 1191 | DEFINE_BUSY_EVENT(xfs_extent_busy_reuse); |
1179 | DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); | 1192 | DEFINE_BUSY_EVENT(xfs_extent_busy_clear); |
1180 | 1193 | ||
1181 | TRACE_EVENT(xfs_alloc_busy_trim, | 1194 | TRACE_EVENT(xfs_extent_busy_trim, |
1182 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1195 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1183 | xfs_agblock_t agbno, xfs_extlen_t len, | 1196 | xfs_agblock_t agbno, xfs_extlen_t len, |
1184 | xfs_agblock_t tbno, xfs_extlen_t tlen), | 1197 | xfs_agblock_t tbno, xfs_extlen_t tlen), |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 103b00c90004..cdf896fcbfa4 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -19,9 +19,7 @@ | |||
19 | #include "xfs.h" | 19 | #include "xfs.h" |
20 | #include "xfs_fs.h" | 20 | #include "xfs_fs.h" |
21 | #include "xfs_types.h" | 21 | #include "xfs_types.h" |
22 | #include "xfs_bit.h" | ||
23 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
24 | #include "xfs_inum.h" | ||
25 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
26 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
27 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
@@ -36,6 +34,7 @@ | |||
36 | #include "xfs_btree.h" | 34 | #include "xfs_btree.h" |
37 | #include "xfs_ialloc.h" | 35 | #include "xfs_ialloc.h" |
38 | #include "xfs_alloc.h" | 36 | #include "xfs_alloc.h" |
37 | #include "xfs_extent_busy.h" | ||
39 | #include "xfs_bmap.h" | 38 | #include "xfs_bmap.h" |
40 | #include "xfs_quota.h" | 39 | #include "xfs_quota.h" |
41 | #include "xfs_trans_priv.h" | 40 | #include "xfs_trans_priv.h" |
@@ -608,8 +607,8 @@ STATIC void | |||
608 | xfs_trans_free( | 607 | xfs_trans_free( |
609 | struct xfs_trans *tp) | 608 | struct xfs_trans *tp) |
610 | { | 609 | { |
611 | xfs_alloc_busy_sort(&tp->t_busy); | 610 | xfs_extent_busy_sort(&tp->t_busy); |
612 | xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false); | 611 | xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); |
613 | 612 | ||
614 | atomic_dec(&tp->t_mountp->m_active_trans); | 613 | atomic_dec(&tp->t_mountp->m_active_trans); |
615 | xfs_trans_free_dqinfo(tp); | 614 | xfs_trans_free_dqinfo(tp); |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index f6118703f20d..7ab99e1898c8 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -345,11 +345,9 @@ struct xfs_item_ops { | |||
345 | void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); | 345 | void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); |
346 | void (*iop_pin)(xfs_log_item_t *); | 346 | void (*iop_pin)(xfs_log_item_t *); |
347 | void (*iop_unpin)(xfs_log_item_t *, int remove); | 347 | void (*iop_unpin)(xfs_log_item_t *, int remove); |
348 | uint (*iop_trylock)(xfs_log_item_t *); | 348 | uint (*iop_push)(struct xfs_log_item *, struct list_head *); |
349 | void (*iop_unlock)(xfs_log_item_t *); | 349 | void (*iop_unlock)(xfs_log_item_t *); |
350 | xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); | 350 | xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); |
351 | void (*iop_push)(xfs_log_item_t *); | ||
352 | bool (*iop_pushbuf)(xfs_log_item_t *); | ||
353 | void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); | 351 | void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); |
354 | }; | 352 | }; |
355 | 353 | ||
@@ -357,20 +355,18 @@ struct xfs_item_ops { | |||
357 | #define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) | 355 | #define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) |
358 | #define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) | 356 | #define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) |
359 | #define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove) | 357 | #define IOP_UNPIN(ip, remove) (*(ip)->li_ops->iop_unpin)(ip, remove) |
360 | #define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) | 358 | #define IOP_PUSH(ip, list) (*(ip)->li_ops->iop_push)(ip, list) |
361 | #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) | 359 | #define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) |
362 | #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) | 360 | #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) |
363 | #define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) | ||
364 | #define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) | ||
365 | #define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) | 361 | #define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) |
366 | 362 | ||
367 | /* | 363 | /* |
368 | * Return values for the IOP_TRYLOCK() routines. | 364 | * Return values for the IOP_PUSH() routines. |
369 | */ | 365 | */ |
370 | #define XFS_ITEM_SUCCESS 0 | 366 | #define XFS_ITEM_SUCCESS 0 |
371 | #define XFS_ITEM_PINNED 1 | 367 | #define XFS_ITEM_PINNED 1 |
372 | #define XFS_ITEM_LOCKED 2 | 368 | #define XFS_ITEM_LOCKED 2 |
373 | #define XFS_ITEM_PUSHBUF 3 | 369 | #define XFS_ITEM_FLUSHING 3 |
374 | 370 | ||
375 | /* | 371 | /* |
376 | * This is the type of function which can be given to xfs_trans_callback() | 372 | * This is the type of function which can be given to xfs_trans_callback() |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 1dead07f092c..9c514483e599 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "xfs_fs.h" | 20 | #include "xfs_fs.h" |
21 | #include "xfs_types.h" | 21 | #include "xfs_types.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 25 | #include "xfs_ag.h" |
@@ -79,7 +78,7 @@ xfs_ail_check( | |||
79 | * Return a pointer to the first item in the AIL. If the AIL is empty, then | 78 | * Return a pointer to the first item in the AIL. If the AIL is empty, then |
80 | * return NULL. | 79 | * return NULL. |
81 | */ | 80 | */ |
82 | static xfs_log_item_t * | 81 | xfs_log_item_t * |
83 | xfs_ail_min( | 82 | xfs_ail_min( |
84 | struct xfs_ail *ailp) | 83 | struct xfs_ail *ailp) |
85 | { | 84 | { |
@@ -364,30 +363,31 @@ xfsaild_push( | |||
364 | xfs_log_item_t *lip; | 363 | xfs_log_item_t *lip; |
365 | xfs_lsn_t lsn; | 364 | xfs_lsn_t lsn; |
366 | xfs_lsn_t target; | 365 | xfs_lsn_t target; |
367 | long tout = 10; | 366 | long tout; |
368 | int stuck = 0; | 367 | int stuck = 0; |
368 | int flushing = 0; | ||
369 | int count = 0; | 369 | int count = 0; |
370 | int push_xfsbufd = 0; | ||
371 | 370 | ||
372 | /* | 371 | /* |
373 | * If last time we ran we encountered pinned items, force the log first | 372 | * If we encountered pinned items or did not finish writing out all |
374 | * and wait for it before pushing again. | 373 | * buffers the last time we ran, force the log first and wait for it |
374 | * before pushing again. | ||
375 | */ | 375 | */ |
376 | spin_lock(&ailp->xa_lock); | 376 | if (ailp->xa_log_flush && ailp->xa_last_pushed_lsn == 0 && |
377 | if (ailp->xa_last_pushed_lsn == 0 && ailp->xa_log_flush && | 377 | (!list_empty_careful(&ailp->xa_buf_list) || |
378 | !list_empty(&ailp->xa_ail)) { | 378 | xfs_ail_min_lsn(ailp))) { |
379 | ailp->xa_log_flush = 0; | 379 | ailp->xa_log_flush = 0; |
380 | spin_unlock(&ailp->xa_lock); | 380 | |
381 | XFS_STATS_INC(xs_push_ail_flush); | 381 | XFS_STATS_INC(xs_push_ail_flush); |
382 | xfs_log_force(mp, XFS_LOG_SYNC); | 382 | xfs_log_force(mp, XFS_LOG_SYNC); |
383 | spin_lock(&ailp->xa_lock); | ||
384 | } | 383 | } |
385 | 384 | ||
386 | target = ailp->xa_target; | 385 | spin_lock(&ailp->xa_lock); |
387 | lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); | 386 | lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); |
388 | if (!lip || XFS_FORCED_SHUTDOWN(mp)) { | 387 | if (!lip) { |
389 | /* | 388 | /* |
390 | * AIL is empty or our push has reached the end. | 389 | * If the AIL is empty or our push has reached the end we are |
390 | * done now. | ||
391 | */ | 391 | */ |
392 | xfs_trans_ail_cursor_done(ailp, &cur); | 392 | xfs_trans_ail_cursor_done(ailp, &cur); |
393 | spin_unlock(&ailp->xa_lock); | 393 | spin_unlock(&ailp->xa_lock); |
@@ -396,54 +396,42 @@ xfsaild_push( | |||
396 | 396 | ||
397 | XFS_STATS_INC(xs_push_ail); | 397 | XFS_STATS_INC(xs_push_ail); |
398 | 398 | ||
399 | /* | ||
400 | * While the item we are looking at is below the given threshold | ||
401 | * try to flush it out. We'd like not to stop until we've at least | ||
402 | * tried to push on everything in the AIL with an LSN less than | ||
403 | * the given threshold. | ||
404 | * | ||
405 | * However, we will stop after a certain number of pushes and wait | ||
406 | * for a reduced timeout to fire before pushing further. This | ||
407 | * prevents use from spinning when we can't do anything or there is | ||
408 | * lots of contention on the AIL lists. | ||
409 | */ | ||
410 | lsn = lip->li_lsn; | 399 | lsn = lip->li_lsn; |
400 | target = ailp->xa_target; | ||
411 | while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { | 401 | while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { |
412 | int lock_result; | 402 | int lock_result; |
403 | |||
413 | /* | 404 | /* |
414 | * If we can lock the item without sleeping, unlock the AIL | 405 | * Note that IOP_PUSH may unlock and reacquire the AIL lock. We |
415 | * lock and flush the item. Then re-grab the AIL lock so we | 406 | * rely on the AIL cursor implementation to be able to deal with |
416 | * can look for the next item on the AIL. List changes are | 407 | * the dropped lock. |
417 | * handled by the AIL lookup functions internally | ||
418 | * | ||
419 | * If we can't lock the item, either its holder will flush it | ||
420 | * or it is already being flushed or it is being relogged. In | ||
421 | * any of these case it is being taken care of and we can just | ||
422 | * skip to the next item in the list. | ||
423 | */ | 408 | */ |
424 | lock_result = IOP_TRYLOCK(lip); | 409 | lock_result = IOP_PUSH(lip, &ailp->xa_buf_list); |
425 | spin_unlock(&ailp->xa_lock); | ||
426 | switch (lock_result) { | 410 | switch (lock_result) { |
427 | case XFS_ITEM_SUCCESS: | 411 | case XFS_ITEM_SUCCESS: |
428 | XFS_STATS_INC(xs_push_ail_success); | 412 | XFS_STATS_INC(xs_push_ail_success); |
429 | trace_xfs_ail_push(lip); | 413 | trace_xfs_ail_push(lip); |
430 | 414 | ||
431 | IOP_PUSH(lip); | ||
432 | ailp->xa_last_pushed_lsn = lsn; | 415 | ailp->xa_last_pushed_lsn = lsn; |
433 | break; | 416 | break; |
434 | 417 | ||
435 | case XFS_ITEM_PUSHBUF: | 418 | case XFS_ITEM_FLUSHING: |
436 | XFS_STATS_INC(xs_push_ail_pushbuf); | 419 | /* |
437 | trace_xfs_ail_pushbuf(lip); | 420 | * The item or its backing buffer is already beeing |
438 | 421 | * flushed. The typical reason for that is that an | |
439 | if (!IOP_PUSHBUF(lip)) { | 422 | * inode buffer is locked because we already pushed the |
440 | trace_xfs_ail_pushbuf_pinned(lip); | 423 | * updates to it as part of inode clustering. |
441 | stuck++; | 424 | * |
442 | ailp->xa_log_flush++; | 425 | * We do not want to to stop flushing just because lots |
443 | } else { | 426 | * of items are already beeing flushed, but we need to |
444 | ailp->xa_last_pushed_lsn = lsn; | 427 | * re-try the flushing relatively soon if most of the |
445 | } | 428 | * AIL is beeing flushed. |
446 | push_xfsbufd = 1; | 429 | */ |
430 | XFS_STATS_INC(xs_push_ail_flushing); | ||
431 | trace_xfs_ail_flushing(lip); | ||
432 | |||
433 | flushing++; | ||
434 | ailp->xa_last_pushed_lsn = lsn; | ||
447 | break; | 435 | break; |
448 | 436 | ||
449 | case XFS_ITEM_PINNED: | 437 | case XFS_ITEM_PINNED: |
@@ -453,28 +441,22 @@ xfsaild_push( | |||
453 | stuck++; | 441 | stuck++; |
454 | ailp->xa_log_flush++; | 442 | ailp->xa_log_flush++; |
455 | break; | 443 | break; |
456 | |||
457 | case XFS_ITEM_LOCKED: | 444 | case XFS_ITEM_LOCKED: |
458 | XFS_STATS_INC(xs_push_ail_locked); | 445 | XFS_STATS_INC(xs_push_ail_locked); |
459 | trace_xfs_ail_locked(lip); | 446 | trace_xfs_ail_locked(lip); |
447 | |||
460 | stuck++; | 448 | stuck++; |
461 | break; | 449 | break; |
462 | |||
463 | default: | 450 | default: |
464 | ASSERT(0); | 451 | ASSERT(0); |
465 | break; | 452 | break; |
466 | } | 453 | } |
467 | 454 | ||
468 | spin_lock(&ailp->xa_lock); | ||
469 | /* should we bother continuing? */ | ||
470 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
471 | break; | ||
472 | ASSERT(mp->m_log); | ||
473 | |||
474 | count++; | 455 | count++; |
475 | 456 | ||
476 | /* | 457 | /* |
477 | * Are there too many items we can't do anything with? | 458 | * Are there too many items we can't do anything with? |
459 | * | ||
478 | * If we we are skipping too many items because we can't flush | 460 | * If we we are skipping too many items because we can't flush |
479 | * them or they are already being flushed, we back off and | 461 | * them or they are already being flushed, we back off and |
480 | * given them time to complete whatever operation is being | 462 | * given them time to complete whatever operation is being |
@@ -496,42 +478,36 @@ xfsaild_push( | |||
496 | xfs_trans_ail_cursor_done(ailp, &cur); | 478 | xfs_trans_ail_cursor_done(ailp, &cur); |
497 | spin_unlock(&ailp->xa_lock); | 479 | spin_unlock(&ailp->xa_lock); |
498 | 480 | ||
499 | if (push_xfsbufd) { | 481 | if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list)) |
500 | /* we've got delayed write buffers to flush */ | 482 | ailp->xa_log_flush++; |
501 | wake_up_process(mp->m_ddev_targp->bt_task); | ||
502 | } | ||
503 | 483 | ||
504 | /* assume we have more work to do in a short while */ | 484 | if (!count || XFS_LSN_CMP(lsn, target) >= 0) { |
505 | out_done: | 485 | out_done: |
506 | if (!count) { | ||
507 | /* We're past our target or empty, so idle */ | ||
508 | ailp->xa_last_pushed_lsn = 0; | ||
509 | ailp->xa_log_flush = 0; | ||
510 | |||
511 | tout = 50; | ||
512 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { | ||
513 | /* | 486 | /* |
514 | * We reached the target so wait a bit longer for I/O to | 487 | * We reached the target or the AIL is empty, so wait a bit |
515 | * complete and remove pushed items from the AIL before we | 488 | * longer for I/O to complete and remove pushed items from the |
516 | * start the next scan from the start of the AIL. | 489 | * AIL before we start the next scan from the start of the AIL. |
517 | */ | 490 | */ |
518 | tout = 50; | 491 | tout = 50; |
519 | ailp->xa_last_pushed_lsn = 0; | 492 | ailp->xa_last_pushed_lsn = 0; |
520 | } else if ((stuck * 100) / count > 90) { | 493 | } else if (((stuck + flushing) * 100) / count > 90) { |
521 | /* | 494 | /* |
522 | * Either there is a lot of contention on the AIL or we | 495 | * Either there is a lot of contention on the AIL or we are |
523 | * are stuck due to operations in progress. "Stuck" in this | 496 | * stuck due to operations in progress. "Stuck" in this case |
524 | * case is defined as >90% of the items we tried to push | 497 | * is defined as >90% of the items we tried to push were stuck. |
525 | * were stuck. | ||
526 | * | 498 | * |
527 | * Backoff a bit more to allow some I/O to complete before | 499 | * Backoff a bit more to allow some I/O to complete before |
528 | * restarting from the start of the AIL. This prevents us | 500 | * restarting from the start of the AIL. This prevents us from |
529 | * from spinning on the same items, and if they are pinned will | 501 | * spinning on the same items, and if they are pinned will all |
530 | * all the restart to issue a log force to unpin the stuck | 502 | * the restart to issue a log force to unpin the stuck items. |
531 | * items. | ||
532 | */ | 503 | */ |
533 | tout = 20; | 504 | tout = 20; |
534 | ailp->xa_last_pushed_lsn = 0; | 505 | ailp->xa_last_pushed_lsn = 0; |
506 | } else { | ||
507 | /* | ||
508 | * Assume we have more work to do in a short while. | ||
509 | */ | ||
510 | tout = 10; | ||
535 | } | 511 | } |
536 | 512 | ||
537 | return tout; | 513 | return tout; |
@@ -544,6 +520,8 @@ xfsaild( | |||
544 | struct xfs_ail *ailp = data; | 520 | struct xfs_ail *ailp = data; |
545 | long tout = 0; /* milliseconds */ | 521 | long tout = 0; /* milliseconds */ |
546 | 522 | ||
523 | current->flags |= PF_MEMALLOC; | ||
524 | |||
547 | while (!kthread_should_stop()) { | 525 | while (!kthread_should_stop()) { |
548 | if (tout && tout <= 20) | 526 | if (tout && tout <= 20) |
549 | __set_current_state(TASK_KILLABLE); | 527 | __set_current_state(TASK_KILLABLE); |
@@ -611,6 +589,30 @@ xfs_ail_push_all( | |||
611 | } | 589 | } |
612 | 590 | ||
613 | /* | 591 | /* |
592 | * Push out all items in the AIL immediately and wait until the AIL is empty. | ||
593 | */ | ||
594 | void | ||
595 | xfs_ail_push_all_sync( | ||
596 | struct xfs_ail *ailp) | ||
597 | { | ||
598 | struct xfs_log_item *lip; | ||
599 | DEFINE_WAIT(wait); | ||
600 | |||
601 | spin_lock(&ailp->xa_lock); | ||
602 | while ((lip = xfs_ail_max(ailp)) != NULL) { | ||
603 | prepare_to_wait(&ailp->xa_empty, &wait, TASK_UNINTERRUPTIBLE); | ||
604 | ailp->xa_target = lip->li_lsn; | ||
605 | wake_up_process(ailp->xa_task); | ||
606 | spin_unlock(&ailp->xa_lock); | ||
607 | schedule(); | ||
608 | spin_lock(&ailp->xa_lock); | ||
609 | } | ||
610 | spin_unlock(&ailp->xa_lock); | ||
611 | |||
612 | finish_wait(&ailp->xa_empty, &wait); | ||
613 | } | ||
614 | |||
615 | /* | ||
614 | * xfs_trans_ail_update - bulk AIL insertion operation. | 616 | * xfs_trans_ail_update - bulk AIL insertion operation. |
615 | * | 617 | * |
616 | * @xfs_trans_ail_update takes an array of log items that all need to be | 618 | * @xfs_trans_ail_update takes an array of log items that all need to be |
@@ -667,11 +669,15 @@ xfs_trans_ail_update_bulk( | |||
667 | 669 | ||
668 | if (!list_empty(&tmp)) | 670 | if (!list_empty(&tmp)) |
669 | xfs_ail_splice(ailp, cur, &tmp, lsn); | 671 | xfs_ail_splice(ailp, cur, &tmp, lsn); |
670 | spin_unlock(&ailp->xa_lock); | ||
671 | 672 | ||
672 | if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { | 673 | if (mlip_changed) { |
673 | xlog_assign_tail_lsn(ailp->xa_mount); | 674 | if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) |
675 | xlog_assign_tail_lsn_locked(ailp->xa_mount); | ||
676 | spin_unlock(&ailp->xa_lock); | ||
677 | |||
674 | xfs_log_space_wake(ailp->xa_mount); | 678 | xfs_log_space_wake(ailp->xa_mount); |
679 | } else { | ||
680 | spin_unlock(&ailp->xa_lock); | ||
675 | } | 681 | } |
676 | } | 682 | } |
677 | 683 | ||
@@ -700,7 +706,8 @@ void | |||
700 | xfs_trans_ail_delete_bulk( | 706 | xfs_trans_ail_delete_bulk( |
701 | struct xfs_ail *ailp, | 707 | struct xfs_ail *ailp, |
702 | struct xfs_log_item **log_items, | 708 | struct xfs_log_item **log_items, |
703 | int nr_items) __releases(ailp->xa_lock) | 709 | int nr_items, |
710 | int shutdown_type) __releases(ailp->xa_lock) | ||
704 | { | 711 | { |
705 | xfs_log_item_t *mlip; | 712 | xfs_log_item_t *mlip; |
706 | int mlip_changed = 0; | 713 | int mlip_changed = 0; |
@@ -718,7 +725,7 @@ xfs_trans_ail_delete_bulk( | |||
718 | xfs_alert_tag(mp, XFS_PTAG_AILDELETE, | 725 | xfs_alert_tag(mp, XFS_PTAG_AILDELETE, |
719 | "%s: attempting to delete a log item that is not in the AIL", | 726 | "%s: attempting to delete a log item that is not in the AIL", |
720 | __func__); | 727 | __func__); |
721 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 728 | xfs_force_shutdown(mp, shutdown_type); |
722 | } | 729 | } |
723 | return; | 730 | return; |
724 | } | 731 | } |
@@ -729,28 +736,20 @@ xfs_trans_ail_delete_bulk( | |||
729 | if (mlip == lip) | 736 | if (mlip == lip) |
730 | mlip_changed = 1; | 737 | mlip_changed = 1; |
731 | } | 738 | } |
732 | spin_unlock(&ailp->xa_lock); | ||
733 | 739 | ||
734 | if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { | 740 | if (mlip_changed) { |
735 | xlog_assign_tail_lsn(ailp->xa_mount); | 741 | if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) |
742 | xlog_assign_tail_lsn_locked(ailp->xa_mount); | ||
743 | if (list_empty(&ailp->xa_ail)) | ||
744 | wake_up_all(&ailp->xa_empty); | ||
745 | spin_unlock(&ailp->xa_lock); | ||
746 | |||
736 | xfs_log_space_wake(ailp->xa_mount); | 747 | xfs_log_space_wake(ailp->xa_mount); |
748 | } else { | ||
749 | spin_unlock(&ailp->xa_lock); | ||
737 | } | 750 | } |
738 | } | 751 | } |
739 | 752 | ||
740 | /* | ||
741 | * The active item list (AIL) is a doubly linked list of log | ||
742 | * items sorted by ascending lsn. The base of the list is | ||
743 | * a forw/back pointer pair embedded in the xfs mount structure. | ||
744 | * The base is initialized with both pointers pointing to the | ||
745 | * base. This case always needs to be distinguished, because | ||
746 | * the base has no lsn to look at. We almost always insert | ||
747 | * at the end of the list, so on inserts we search from the | ||
748 | * end of the list to find where the new item belongs. | ||
749 | */ | ||
750 | |||
751 | /* | ||
752 | * Initialize the doubly linked list to point only to itself. | ||
753 | */ | ||
754 | int | 753 | int |
755 | xfs_trans_ail_init( | 754 | xfs_trans_ail_init( |
756 | xfs_mount_t *mp) | 755 | xfs_mount_t *mp) |
@@ -765,6 +764,8 @@ xfs_trans_ail_init( | |||
765 | INIT_LIST_HEAD(&ailp->xa_ail); | 764 | INIT_LIST_HEAD(&ailp->xa_ail); |
766 | INIT_LIST_HEAD(&ailp->xa_cursors); | 765 | INIT_LIST_HEAD(&ailp->xa_cursors); |
767 | spin_lock_init(&ailp->xa_lock); | 766 | spin_lock_init(&ailp->xa_lock); |
767 | INIT_LIST_HEAD(&ailp->xa_buf_list); | ||
768 | init_waitqueue_head(&ailp->xa_empty); | ||
768 | 769 | ||
769 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", | 770 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", |
770 | ailp->xa_mount->m_fsname); | 771 | ailp->xa_mount->m_fsname); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 1302d1d95a58..21c5a5e3700d 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -18,9 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
@@ -33,7 +31,6 @@ | |||
33 | #include "xfs_buf_item.h" | 31 | #include "xfs_buf_item.h" |
34 | #include "xfs_trans_priv.h" | 32 | #include "xfs_trans_priv.h" |
35 | #include "xfs_error.h" | 33 | #include "xfs_error.h" |
36 | #include "xfs_rw.h" | ||
37 | #include "xfs_trace.h" | 34 | #include "xfs_trace.h" |
38 | 35 | ||
39 | /* | 36 | /* |
@@ -56,7 +53,7 @@ xfs_trans_buf_item_match( | |||
56 | if (blip->bli_item.li_type == XFS_LI_BUF && | 53 | if (blip->bli_item.li_type == XFS_LI_BUF && |
57 | blip->bli_buf->b_target == target && | 54 | blip->bli_buf->b_target == target && |
58 | XFS_BUF_ADDR(blip->bli_buf) == blkno && | 55 | XFS_BUF_ADDR(blip->bli_buf) == blkno && |
59 | XFS_BUF_COUNT(blip->bli_buf) == len) | 56 | BBTOB(blip->bli_buf->b_length) == len) |
60 | return blip->bli_buf; | 57 | return blip->bli_buf; |
61 | } | 58 | } |
62 | 59 | ||
@@ -141,15 +138,11 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
141 | xfs_buf_t *bp; | 138 | xfs_buf_t *bp; |
142 | xfs_buf_log_item_t *bip; | 139 | xfs_buf_log_item_t *bip; |
143 | 140 | ||
144 | if (flags == 0) | ||
145 | flags = XBF_LOCK | XBF_MAPPED; | ||
146 | |||
147 | /* | 141 | /* |
148 | * Default to a normal get_buf() call if the tp is NULL. | 142 | * Default to a normal get_buf() call if the tp is NULL. |
149 | */ | 143 | */ |
150 | if (tp == NULL) | 144 | if (tp == NULL) |
151 | return xfs_buf_get(target_dev, blkno, len, | 145 | return xfs_buf_get(target_dev, blkno, len, flags); |
152 | flags | XBF_DONT_BLOCK); | ||
153 | 146 | ||
154 | /* | 147 | /* |
155 | * If we find the buffer in the cache with this transaction | 148 | * If we find the buffer in the cache with this transaction |
@@ -165,14 +158,6 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
165 | XFS_BUF_DONE(bp); | 158 | XFS_BUF_DONE(bp); |
166 | } | 159 | } |
167 | 160 | ||
168 | /* | ||
169 | * If the buffer is stale then it was binval'ed | ||
170 | * since last read. This doesn't matter since the | ||
171 | * caller isn't allowed to use the data anyway. | ||
172 | */ | ||
173 | else if (XFS_BUF_ISSTALE(bp)) | ||
174 | ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); | ||
175 | |||
176 | ASSERT(bp->b_transp == tp); | 161 | ASSERT(bp->b_transp == tp); |
177 | bip = bp->b_fspriv; | 162 | bip = bp->b_fspriv; |
178 | ASSERT(bip != NULL); | 163 | ASSERT(bip != NULL); |
@@ -182,15 +167,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
182 | return (bp); | 167 | return (bp); |
183 | } | 168 | } |
184 | 169 | ||
185 | /* | 170 | bp = xfs_buf_get(target_dev, blkno, len, flags); |
186 | * We always specify the XBF_DONT_BLOCK flag within a transaction | ||
187 | * so that get_buf does not try to push out a delayed write buffer | ||
188 | * which might cause another transaction to take place (if the | ||
189 | * buffer was delayed alloc). Such recursive transactions can | ||
190 | * easily deadlock with our current transaction as well as cause | ||
191 | * us to run out of stack space. | ||
192 | */ | ||
193 | bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK); | ||
194 | if (bp == NULL) { | 171 | if (bp == NULL) { |
195 | return NULL; | 172 | return NULL; |
196 | } | 173 | } |
@@ -282,14 +259,13 @@ xfs_trans_read_buf( | |||
282 | xfs_buf_log_item_t *bip; | 259 | xfs_buf_log_item_t *bip; |
283 | int error; | 260 | int error; |
284 | 261 | ||
285 | if (flags == 0) | 262 | *bpp = NULL; |
286 | flags = XBF_LOCK | XBF_MAPPED; | ||
287 | 263 | ||
288 | /* | 264 | /* |
289 | * Default to a normal get_buf() call if the tp is NULL. | 265 | * Default to a normal get_buf() call if the tp is NULL. |
290 | */ | 266 | */ |
291 | if (tp == NULL) { | 267 | if (tp == NULL) { |
292 | bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); | 268 | bp = xfs_buf_read(target, blkno, len, flags); |
293 | if (!bp) | 269 | if (!bp) |
294 | return (flags & XBF_TRYLOCK) ? | 270 | return (flags & XBF_TRYLOCK) ? |
295 | EAGAIN : XFS_ERROR(ENOMEM); | 271 | EAGAIN : XFS_ERROR(ENOMEM); |
@@ -297,6 +273,8 @@ xfs_trans_read_buf( | |||
297 | if (bp->b_error) { | 273 | if (bp->b_error) { |
298 | error = bp->b_error; | 274 | error = bp->b_error; |
299 | xfs_buf_ioerror_alert(bp, __func__); | 275 | xfs_buf_ioerror_alert(bp, __func__); |
276 | XFS_BUF_UNDONE(bp); | ||
277 | xfs_buf_stale(bp); | ||
300 | xfs_buf_relse(bp); | 278 | xfs_buf_relse(bp); |
301 | return error; | 279 | return error; |
302 | } | 280 | } |
@@ -371,15 +349,7 @@ xfs_trans_read_buf( | |||
371 | return 0; | 349 | return 0; |
372 | } | 350 | } |
373 | 351 | ||
374 | /* | 352 | bp = xfs_buf_read(target, blkno, len, flags); |
375 | * We always specify the XBF_DONT_BLOCK flag within a transaction | ||
376 | * so that get_buf does not try to push out a delayed write buffer | ||
377 | * which might cause another transaction to take place (if the | ||
378 | * buffer was delayed alloc). Such recursive transactions can | ||
379 | * easily deadlock with our current transaction as well as cause | ||
380 | * us to run out of stack space. | ||
381 | */ | ||
382 | bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); | ||
383 | if (bp == NULL) { | 353 | if (bp == NULL) { |
384 | *bpp = NULL; | 354 | *bpp = NULL; |
385 | return (flags & XBF_TRYLOCK) ? | 355 | return (flags & XBF_TRYLOCK) ? |
@@ -418,19 +388,6 @@ xfs_trans_read_buf( | |||
418 | return 0; | 388 | return 0; |
419 | 389 | ||
420 | shutdown_abort: | 390 | shutdown_abort: |
421 | /* | ||
422 | * the theory here is that buffer is good but we're | ||
423 | * bailing out because the filesystem is being forcibly | ||
424 | * shut down. So we should leave the b_flags alone since | ||
425 | * the buffer's not staled and just get out. | ||
426 | */ | ||
427 | #if defined(DEBUG) | ||
428 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) | ||
429 | xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); | ||
430 | #endif | ||
431 | ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) != | ||
432 | (XBF_STALE|XBF_DELWRI)); | ||
433 | |||
434 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); | 391 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); |
435 | xfs_buf_relse(bp); | 392 | xfs_buf_relse(bp); |
436 | *bpp = NULL; | 393 | *bpp = NULL; |
@@ -606,7 +563,7 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
606 | 563 | ||
607 | ASSERT(bp->b_transp == tp); | 564 | ASSERT(bp->b_transp == tp); |
608 | ASSERT(bip != NULL); | 565 | ASSERT(bip != NULL); |
609 | ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); | 566 | ASSERT(first <= last && last < BBTOB(bp->b_length)); |
610 | ASSERT(bp->b_iodone == NULL || | 567 | ASSERT(bp->b_iodone == NULL || |
611 | bp->b_iodone == xfs_buf_iodone_callbacks); | 568 | bp->b_iodone == xfs_buf_iodone_callbacks); |
612 | 569 | ||
@@ -626,8 +583,6 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
626 | bp->b_iodone = xfs_buf_iodone_callbacks; | 583 | bp->b_iodone = xfs_buf_iodone_callbacks; |
627 | bip->bli_item.li_cb = xfs_buf_iodone; | 584 | bip->bli_item.li_cb = xfs_buf_iodone; |
628 | 585 | ||
629 | xfs_buf_delwri_queue(bp); | ||
630 | |||
631 | trace_xfs_trans_log_buf(bip); | 586 | trace_xfs_trans_log_buf(bip); |
632 | 587 | ||
633 | /* | 588 | /* |
@@ -651,22 +606,33 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
651 | 606 | ||
652 | 607 | ||
653 | /* | 608 | /* |
654 | * This called to invalidate a buffer that is being used within | 609 | * Invalidate a buffer that is being used within a transaction. |
655 | * a transaction. Typically this is because the blocks in the | 610 | * |
656 | * buffer are being freed, so we need to prevent it from being | 611 | * Typically this is because the blocks in the buffer are being freed, so we |
657 | * written out when we're done. Allowing it to be written again | 612 | * need to prevent it from being written out when we're done. Allowing it |
658 | * might overwrite data in the free blocks if they are reallocated | 613 | * to be written again might overwrite data in the free blocks if they are |
659 | * to a file. | 614 | * reallocated to a file. |
615 | * | ||
616 | * We prevent the buffer from being written out by marking it stale. We can't | ||
617 | * get rid of the buf log item at this point because the buffer may still be | ||
618 | * pinned by another transaction. If that is the case, then we'll wait until | ||
619 | * the buffer is committed to disk for the last time (we can tell by the ref | ||
620 | * count) and free it in xfs_buf_item_unpin(). Until that happens we will | ||
621 | * keep the buffer locked so that the buffer and buf log item are not reused. | ||
622 | * | ||
623 | * We also set the XFS_BLF_CANCEL flag in the buf log format structure and log | ||
624 | * the buf item. This will be used at recovery time to determine that copies | ||
625 | * of the buffer in the log before this should not be replayed. | ||
660 | * | 626 | * |
661 | * We prevent the buffer from being written out by clearing the | 627 | * We mark the item descriptor and the transaction dirty so that we'll hold |
662 | * B_DELWRI flag. We can't always | 628 | * the buffer until after the commit. |
663 | * get rid of the buf log item at this point, though, because | 629 | * |
664 | * the buffer may still be pinned by another transaction. If that | 630 | * Since we're invalidating the buffer, we also clear the state about which |
665 | * is the case, then we'll wait until the buffer is committed to | 631 | * parts of the buffer have been logged. We also clear the flag indicating |
666 | * disk for the last time (we can tell by the ref count) and | 632 | * that this is an inode buffer since the data in the buffer will no longer |
667 | * free it in xfs_buf_item_unpin(). Until it is cleaned up we | 633 | * be valid. |
668 | * will keep the buffer locked so that the buffer and buf log item | 634 | * |
669 | * are not reused. | 635 | * We set the stale bit in the buffer as well since we're getting rid of it. |
670 | */ | 636 | */ |
671 | void | 637 | void |
672 | xfs_trans_binval( | 638 | xfs_trans_binval( |
@@ -686,7 +652,6 @@ xfs_trans_binval( | |||
686 | * If the buffer is already invalidated, then | 652 | * If the buffer is already invalidated, then |
687 | * just return. | 653 | * just return. |
688 | */ | 654 | */ |
689 | ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); | ||
690 | ASSERT(XFS_BUF_ISSTALE(bp)); | 655 | ASSERT(XFS_BUF_ISSTALE(bp)); |
691 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); | 656 | ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); |
692 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); | 657 | ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); |
@@ -696,27 +661,8 @@ xfs_trans_binval( | |||
696 | return; | 661 | return; |
697 | } | 662 | } |
698 | 663 | ||
699 | /* | ||
700 | * Clear the dirty bit in the buffer and set the STALE flag | ||
701 | * in the buf log item. The STALE flag will be used in | ||
702 | * xfs_buf_item_unpin() to determine if it should clean up | ||
703 | * when the last reference to the buf item is given up. | ||
704 | * We set the XFS_BLF_CANCEL flag in the buf log format structure | ||
705 | * and log the buf item. This will be used at recovery time | ||
706 | * to determine that copies of the buffer in the log before | ||
707 | * this should not be replayed. | ||
708 | * We mark the item descriptor and the transaction dirty so | ||
709 | * that we'll hold the buffer until after the commit. | ||
710 | * | ||
711 | * Since we're invalidating the buffer, we also clear the state | ||
712 | * about which parts of the buffer have been logged. We also | ||
713 | * clear the flag indicating that this is an inode buffer since | ||
714 | * the data in the buffer will no longer be valid. | ||
715 | * | ||
716 | * We set the stale bit in the buffer as well since we're getting | ||
717 | * rid of it. | ||
718 | */ | ||
719 | xfs_buf_stale(bp); | 664 | xfs_buf_stale(bp); |
665 | |||
720 | bip->bli_flags |= XFS_BLI_STALE; | 666 | bip->bli_flags |= XFS_BLI_STALE; |
721 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); | 667 | bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); |
722 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; | 668 | bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; |
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 279099717ed2..bcb60542fcf1 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
@@ -17,9 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_bit.h" | ||
21 | #include "xfs_log.h" | 20 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 21 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index f7590f5badea..8d71b16eccae 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_inum.h" | ||
23 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 7a7442c03f2b..d2eee20d5f5b 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -18,9 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 8ab2ced415f1..fb62377d1cbc 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -71,6 +71,8 @@ struct xfs_ail { | |||
71 | spinlock_t xa_lock; | 71 | spinlock_t xa_lock; |
72 | xfs_lsn_t xa_last_pushed_lsn; | 72 | xfs_lsn_t xa_last_pushed_lsn; |
73 | int xa_log_flush; | 73 | int xa_log_flush; |
74 | struct list_head xa_buf_list; | ||
75 | wait_queue_head_t xa_empty; | ||
74 | }; | 76 | }; |
75 | 77 | ||
76 | /* | 78 | /* |
@@ -90,18 +92,22 @@ xfs_trans_ail_update( | |||
90 | } | 92 | } |
91 | 93 | ||
92 | void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, | 94 | void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, |
93 | struct xfs_log_item **log_items, int nr_items) | 95 | struct xfs_log_item **log_items, int nr_items, |
96 | int shutdown_type) | ||
94 | __releases(ailp->xa_lock); | 97 | __releases(ailp->xa_lock); |
95 | static inline void | 98 | static inline void |
96 | xfs_trans_ail_delete( | 99 | xfs_trans_ail_delete( |
97 | struct xfs_ail *ailp, | 100 | struct xfs_ail *ailp, |
98 | xfs_log_item_t *lip) __releases(ailp->xa_lock) | 101 | xfs_log_item_t *lip, |
102 | int shutdown_type) __releases(ailp->xa_lock) | ||
99 | { | 103 | { |
100 | xfs_trans_ail_delete_bulk(ailp, &lip, 1); | 104 | xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type); |
101 | } | 105 | } |
102 | 106 | ||
103 | void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); | 107 | void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); |
104 | void xfs_ail_push_all(struct xfs_ail *); | 108 | void xfs_ail_push_all(struct xfs_ail *); |
109 | void xfs_ail_push_all_sync(struct xfs_ail *); | ||
110 | struct xfs_log_item *xfs_ail_min(struct xfs_ail *ailp); | ||
105 | xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); | 111 | xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); |
106 | 112 | ||
107 | struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, | 113 | struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 65584b55607d..398cf681d025 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -57,6 +57,7 @@ typedef __uint64_t __psunsigned_t; | |||
57 | #endif /* __KERNEL__ */ | 57 | #endif /* __KERNEL__ */ |
58 | 58 | ||
59 | typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */ | 59 | typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */ |
60 | typedef __uint32_t xfs_agino_t; /* inode # within allocation grp */ | ||
60 | typedef __uint32_t xfs_extlen_t; /* extent length in blocks */ | 61 | typedef __uint32_t xfs_extlen_t; /* extent length in blocks */ |
61 | typedef __uint32_t xfs_agnumber_t; /* allocation group number */ | 62 | typedef __uint32_t xfs_agnumber_t; /* allocation group number */ |
62 | typedef __int32_t xfs_extnum_t; /* # of extents in a file */ | 63 | typedef __int32_t xfs_extnum_t; /* # of extents in a file */ |
@@ -101,6 +102,7 @@ typedef __uint64_t xfs_fileoff_t; /* block number in a file */ | |||
101 | typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ | 102 | typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ |
102 | typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ | 103 | typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ |
103 | 104 | ||
105 | |||
104 | /* | 106 | /* |
105 | * Null values for the types. | 107 | * Null values for the types. |
106 | */ | 108 | */ |
@@ -120,6 +122,9 @@ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ | |||
120 | 122 | ||
121 | #define NULLCOMMITLSN ((xfs_lsn_t)-1) | 123 | #define NULLCOMMITLSN ((xfs_lsn_t)-1) |
122 | 124 | ||
125 | #define NULLFSINO ((xfs_ino_t)-1) | ||
126 | #define NULLAGINO ((xfs_agino_t)-1) | ||
127 | |||
123 | /* | 128 | /* |
124 | * Max values for extlen, extnum, aextnum. | 129 | * Max values for extlen, extnum, aextnum. |
125 | */ | 130 | */ |
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 79c05ac85bfe..4e5b9ad5cb97 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c | |||
@@ -18,9 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 64981d7e7375..b6a82d817a82 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include "xfs_types.h" | 21 | #include "xfs_types.h" |
22 | #include "xfs_bit.h" | 22 | #include "xfs_bit.h" |
23 | #include "xfs_log.h" | 23 | #include "xfs_log.h" |
24 | #include "xfs_inum.h" | ||
25 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
26 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
27 | #include "xfs_ag.h" | 26 | #include "xfs_ag.h" |
@@ -39,7 +38,6 @@ | |||
39 | #include "xfs_bmap.h" | 38 | #include "xfs_bmap.h" |
40 | #include "xfs_acl.h" | 39 | #include "xfs_acl.h" |
41 | #include "xfs_attr.h" | 40 | #include "xfs_attr.h" |
42 | #include "xfs_rw.h" | ||
43 | #include "xfs_error.h" | 41 | #include "xfs_error.h" |
44 | #include "xfs_quota.h" | 42 | #include "xfs_quota.h" |
45 | #include "xfs_utils.h" | 43 | #include "xfs_utils.h" |
@@ -81,8 +79,7 @@ xfs_readlink_bmap( | |||
81 | d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); | 79 | d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); |
82 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); | 80 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); |
83 | 81 | ||
84 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), | 82 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); |
85 | XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); | ||
86 | if (!bp) | 83 | if (!bp) |
87 | return XFS_ERROR(ENOMEM); | 84 | return XFS_ERROR(ENOMEM); |
88 | error = bp->b_error; | 85 | error = bp->b_error; |
@@ -1919,7 +1916,7 @@ xfs_alloc_file_space( | |||
1919 | 1916 | ||
1920 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ | 1917 | error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ |
1921 | xfs_bmap_cancel(&free_list); | 1918 | xfs_bmap_cancel(&free_list); |
1922 | xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); | 1919 | xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); |
1923 | 1920 | ||
1924 | error1: /* Just cancel transaction */ | 1921 | error1: /* Just cancel transaction */ |
1925 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 1922 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
@@ -1966,7 +1963,7 @@ xfs_zero_remaining_bytes( | |||
1966 | 1963 | ||
1967 | bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? | 1964 | bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? |
1968 | mp->m_rtdev_targp : mp->m_ddev_targp, | 1965 | mp->m_rtdev_targp : mp->m_ddev_targp, |
1969 | mp->m_sb.sb_blocksize, XBF_DONT_BLOCK); | 1966 | BTOBB(mp->m_sb.sb_blocksize), 0); |
1970 | if (!bp) | 1967 | if (!bp) |
1971 | return XFS_ERROR(ENOMEM); | 1968 | return XFS_ERROR(ENOMEM); |
1972 | 1969 | ||
@@ -2315,17 +2312,33 @@ xfs_change_file_space( | |||
2315 | case XFS_IOC_ALLOCSP64: | 2312 | case XFS_IOC_ALLOCSP64: |
2316 | case XFS_IOC_FREESP: | 2313 | case XFS_IOC_FREESP: |
2317 | case XFS_IOC_FREESP64: | 2314 | case XFS_IOC_FREESP64: |
2315 | /* | ||
2316 | * These operations actually do IO when extending the file, but | ||
2317 | * the allocation is done seperately to the zeroing that is | ||
2318 | * done. This set of operations need to be serialised against | ||
2319 | * other IO operations, such as truncate and buffered IO. We | ||
2320 | * need to take the IOLOCK here to serialise the allocation and | ||
2321 | * zeroing IO to prevent other IOLOCK holders (e.g. getbmap, | ||
2322 | * truncate, direct IO) from racing against the transient | ||
2323 | * allocated but not written state we can have here. | ||
2324 | */ | ||
2325 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
2318 | if (startoffset > fsize) { | 2326 | if (startoffset > fsize) { |
2319 | error = xfs_alloc_file_space(ip, fsize, | 2327 | error = xfs_alloc_file_space(ip, fsize, |
2320 | startoffset - fsize, 0, attr_flags); | 2328 | startoffset - fsize, 0, |
2321 | if (error) | 2329 | attr_flags | XFS_ATTR_NOLOCK); |
2330 | if (error) { | ||
2331 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
2322 | break; | 2332 | break; |
2333 | } | ||
2323 | } | 2334 | } |
2324 | 2335 | ||
2325 | iattr.ia_valid = ATTR_SIZE; | 2336 | iattr.ia_valid = ATTR_SIZE; |
2326 | iattr.ia_size = startoffset; | 2337 | iattr.ia_size = startoffset; |
2327 | 2338 | ||
2328 | error = xfs_setattr_size(ip, &iattr, attr_flags); | 2339 | error = xfs_setattr_size(ip, &iattr, |
2340 | attr_flags | XFS_ATTR_NOLOCK); | ||
2341 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
2329 | 2342 | ||
2330 | if (error) | 2343 | if (error) |
2331 | return error; | 2344 | return error; |