aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c542
1 files changed, 333 insertions, 209 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 965df1227d64..44c2b0ef9a41 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -18,7 +18,7 @@
18#include "xfs.h" 18#include "xfs.h"
19#include <linux/stddef.h> 19#include <linux/stddef.h>
20#include <linux/errno.h> 20#include <linux/errno.h>
21#include <linux/slab.h> 21#include <linux/gfp.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/vmalloc.h> 24#include <linux/vmalloc.h>
@@ -33,12 +33,14 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
36 37
37#include "xfs_sb.h" 38#include "xfs_sb.h"
38#include "xfs_inum.h" 39#include "xfs_inum.h"
39#include "xfs_ag.h" 40#include "xfs_ag.h"
40#include "xfs_dmapi.h" 41#include "xfs_dmapi.h"
41#include "xfs_mount.h" 42#include "xfs_mount.h"
43#include "xfs_trace.h"
42 44
43static kmem_zone_t *xfs_buf_zone; 45static kmem_zone_t *xfs_buf_zone;
44STATIC int xfsbufd(void *); 46STATIC int xfsbufd(void *);
@@ -53,34 +55,6 @@ static struct workqueue_struct *xfslogd_workqueue;
53struct workqueue_struct *xfsdatad_workqueue; 55struct workqueue_struct *xfsdatad_workqueue;
54struct workqueue_struct *xfsconvertd_workqueue; 56struct workqueue_struct *xfsconvertd_workqueue;
55 57
56#ifdef XFS_BUF_TRACE
57void
58xfs_buf_trace(
59 xfs_buf_t *bp,
60 char *id,
61 void *data,
62 void *ra)
63{
64 ktrace_enter(xfs_buf_trace_buf,
65 bp, id,
66 (void *)(unsigned long)bp->b_flags,
67 (void *)(unsigned long)bp->b_hold.counter,
68 (void *)(unsigned long)bp->b_sema.count,
69 (void *)current,
70 data, ra,
71 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
72 (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
73 (void *)(unsigned long)bp->b_buffer_length,
74 NULL, NULL, NULL, NULL, NULL);
75}
76ktrace_t *xfs_buf_trace_buf;
77#define XFS_BUF_TRACE_SIZE 4096
78#define XB_TRACE(bp, id, data) \
79 xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
80#else
81#define XB_TRACE(bp, id, data) do { } while (0)
82#endif
83
84#ifdef XFS_BUF_LOCK_TRACKING 58#ifdef XFS_BUF_LOCK_TRACKING
85# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) 59# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
86# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) 60# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
@@ -103,6 +77,27 @@ ktrace_t *xfs_buf_trace_buf;
103#define xfs_buf_deallocate(bp) \ 77#define xfs_buf_deallocate(bp) \
104 kmem_zone_free(xfs_buf_zone, (bp)); 78 kmem_zone_free(xfs_buf_zone, (bp));
105 79
80static inline int
81xfs_buf_is_vmapped(
82 struct xfs_buf *bp)
83{
84 /*
85 * Return true if the buffer is vmapped.
86 *
87 * The XBF_MAPPED flag is set if the buffer should be mapped, but the
88 * code is clever enough to know it doesn't have to map a single page,
89 * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
90 */
91 return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
92}
93
94static inline int
95xfs_buf_vmap_len(
96 struct xfs_buf *bp)
97{
98 return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
99}
100
106/* 101/*
107 * Page Region interfaces. 102 * Page Region interfaces.
108 * 103 *
@@ -149,7 +144,7 @@ page_region_mask(
149 return mask; 144 return mask;
150} 145}
151 146
152STATIC_INLINE void 147STATIC void
153set_page_region( 148set_page_region(
154 struct page *page, 149 struct page *page,
155 size_t offset, 150 size_t offset,
@@ -161,7 +156,7 @@ set_page_region(
161 SetPageUptodate(page); 156 SetPageUptodate(page);
162} 157}
163 158
164STATIC_INLINE int 159STATIC int
165test_page_region( 160test_page_region(
166 struct page *page, 161 struct page *page,
167 size_t offset, 162 size_t offset,
@@ -173,75 +168,6 @@ test_page_region(
173} 168}
174 169
175/* 170/*
176 * Mapping of multi-page buffers into contiguous virtual space
177 */
178
179typedef struct a_list {
180 void *vm_addr;
181 struct a_list *next;
182} a_list_t;
183
184static a_list_t *as_free_head;
185static int as_list_len;
186static DEFINE_SPINLOCK(as_lock);
187
188/*
189 * Try to batch vunmaps because they are costly.
190 */
191STATIC void
192free_address(
193 void *addr)
194{
195 a_list_t *aentry;
196
197#ifdef CONFIG_XEN
198 /*
199 * Xen needs to be able to make sure it can get an exclusive
200 * RO mapping of pages it wants to turn into a pagetable. If
201 * a newly allocated page is also still being vmap()ed by xfs,
202 * it will cause pagetable construction to fail. This is a
203 * quick workaround to always eagerly unmap pages so that Xen
204 * is happy.
205 */
206 vunmap(addr);
207 return;
208#endif
209
210 aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
211 if (likely(aentry)) {
212 spin_lock(&as_lock);
213 aentry->next = as_free_head;
214 aentry->vm_addr = addr;
215 as_free_head = aentry;
216 as_list_len++;
217 spin_unlock(&as_lock);
218 } else {
219 vunmap(addr);
220 }
221}
222
223STATIC void
224purge_addresses(void)
225{
226 a_list_t *aentry, *old;
227
228 if (as_free_head == NULL)
229 return;
230
231 spin_lock(&as_lock);
232 aentry = as_free_head;
233 as_free_head = NULL;
234 as_list_len = 0;
235 spin_unlock(&as_lock);
236
237 while ((old = aentry) != NULL) {
238 vunmap(aentry->vm_addr);
239 aentry = aentry->next;
240 kfree(old);
241 }
242}
243
244/*
245 * Internal xfs_buf_t object manipulation 171 * Internal xfs_buf_t object manipulation
246 */ 172 */
247 173
@@ -279,7 +205,8 @@ _xfs_buf_initialize(
279 init_waitqueue_head(&bp->b_waiters); 205 init_waitqueue_head(&bp->b_waiters);
280 206
281 XFS_STATS_INC(xb_create); 207 XFS_STATS_INC(xb_create);
282 XB_TRACE(bp, "initialize", target); 208
209 trace_xfs_buf_init(bp, _RET_IP_);
283} 210}
284 211
285/* 212/*
@@ -318,6 +245,7 @@ _xfs_buf_free_pages(
318{ 245{
319 if (bp->b_pages != bp->b_page_array) { 246 if (bp->b_pages != bp->b_page_array) {
320 kmem_free(bp->b_pages); 247 kmem_free(bp->b_pages);
248 bp->b_pages = NULL;
321 } 249 }
322} 250}
323 251
@@ -332,15 +260,16 @@ void
332xfs_buf_free( 260xfs_buf_free(
333 xfs_buf_t *bp) 261 xfs_buf_t *bp)
334{ 262{
335 XB_TRACE(bp, "free", 0); 263 trace_xfs_buf_free(bp, _RET_IP_);
336 264
337 ASSERT(list_empty(&bp->b_hash_list)); 265 ASSERT(list_empty(&bp->b_hash_list));
338 266
339 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 267 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
340 uint i; 268 uint i;
341 269
342 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) 270 if (xfs_buf_is_vmapped(bp))
343 free_address(bp->b_addr - bp->b_offset); 271 vm_unmap_ram(bp->b_addr - bp->b_offset,
272 bp->b_page_count);
344 273
345 for (i = 0; i < bp->b_page_count; i++) { 274 for (i = 0; i < bp->b_page_count; i++) {
346 struct page *page = bp->b_pages[i]; 275 struct page *page = bp->b_pages[i];
@@ -349,9 +278,8 @@ xfs_buf_free(
349 ASSERT(!PagePrivate(page)); 278 ASSERT(!PagePrivate(page));
350 page_cache_release(page); 279 page_cache_release(page);
351 } 280 }
352 _xfs_buf_free_pages(bp);
353 } 281 }
354 282 _xfs_buf_free_pages(bp);
355 xfs_buf_deallocate(bp); 283 xfs_buf_deallocate(bp);
356} 284}
357 285
@@ -445,7 +373,6 @@ _xfs_buf_lookup_pages(
445 if (page_count == bp->b_page_count) 373 if (page_count == bp->b_page_count)
446 bp->b_flags |= XBF_DONE; 374 bp->b_flags |= XBF_DONE;
447 375
448 XB_TRACE(bp, "lookup_pages", (long)page_count);
449 return error; 376 return error;
450} 377}
451 378
@@ -462,10 +389,8 @@ _xfs_buf_map_pages(
462 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 389 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
463 bp->b_flags |= XBF_MAPPED; 390 bp->b_flags |= XBF_MAPPED;
464 } else if (flags & XBF_MAPPED) { 391 } else if (flags & XBF_MAPPED) {
465 if (as_list_len > 64) 392 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
466 purge_addresses(); 393 -1, PAGE_KERNEL);
467 bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
468 VM_MAP, PAGE_KERNEL);
469 if (unlikely(bp->b_addr == NULL)) 394 if (unlikely(bp->b_addr == NULL))
470 return -ENOMEM; 395 return -ENOMEM;
471 bp->b_addr += bp->b_offset; 396 bp->b_addr += bp->b_offset;
@@ -548,7 +473,6 @@ found:
548 if (down_trylock(&bp->b_sema)) { 473 if (down_trylock(&bp->b_sema)) {
549 if (!(flags & XBF_TRYLOCK)) { 474 if (!(flags & XBF_TRYLOCK)) {
550 /* wait for buffer ownership */ 475 /* wait for buffer ownership */
551 XB_TRACE(bp, "get_lock", 0);
552 xfs_buf_lock(bp); 476 xfs_buf_lock(bp);
553 XFS_STATS_INC(xb_get_locked_waited); 477 XFS_STATS_INC(xb_get_locked_waited);
554 } else { 478 } else {
@@ -571,7 +495,8 @@ found:
571 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 495 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
572 bp->b_flags &= XBF_MAPPED; 496 bp->b_flags &= XBF_MAPPED;
573 } 497 }
574 XB_TRACE(bp, "got_lock", 0); 498
499 trace_xfs_buf_find(bp, flags, _RET_IP_);
575 XFS_STATS_INC(xb_get_locked); 500 XFS_STATS_INC(xb_get_locked);
576 return bp; 501 return bp;
577} 502}
@@ -582,7 +507,7 @@ found:
582 * although backing storage may not be. 507 * although backing storage may not be.
583 */ 508 */
584xfs_buf_t * 509xfs_buf_t *
585xfs_buf_get_flags( 510xfs_buf_get(
586 xfs_buftarg_t *target,/* target for buffer */ 511 xfs_buftarg_t *target,/* target for buffer */
587 xfs_off_t ioff, /* starting offset of range */ 512 xfs_off_t ioff, /* starting offset of range */
588 size_t isize, /* length of range */ 513 size_t isize, /* length of range */
@@ -627,7 +552,7 @@ xfs_buf_get_flags(
627 bp->b_bn = ioff; 552 bp->b_bn = ioff;
628 bp->b_count_desired = bp->b_buffer_length; 553 bp->b_count_desired = bp->b_buffer_length;
629 554
630 XB_TRACE(bp, "get", (unsigned long)flags); 555 trace_xfs_buf_get(bp, flags, _RET_IP_);
631 return bp; 556 return bp;
632 557
633 no_buffer: 558 no_buffer:
@@ -644,8 +569,6 @@ _xfs_buf_read(
644{ 569{
645 int status; 570 int status;
646 571
647 XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags);
648
649 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); 572 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
650 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 573 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
651 574
@@ -661,7 +584,7 @@ _xfs_buf_read(
661} 584}
662 585
663xfs_buf_t * 586xfs_buf_t *
664xfs_buf_read_flags( 587xfs_buf_read(
665 xfs_buftarg_t *target, 588 xfs_buftarg_t *target,
666 xfs_off_t ioff, 589 xfs_off_t ioff,
667 size_t isize, 590 size_t isize,
@@ -671,21 +594,20 @@ xfs_buf_read_flags(
671 594
672 flags |= XBF_READ; 595 flags |= XBF_READ;
673 596
674 bp = xfs_buf_get_flags(target, ioff, isize, flags); 597 bp = xfs_buf_get(target, ioff, isize, flags);
675 if (bp) { 598 if (bp) {
599 trace_xfs_buf_read(bp, flags, _RET_IP_);
600
676 if (!XFS_BUF_ISDONE(bp)) { 601 if (!XFS_BUF_ISDONE(bp)) {
677 XB_TRACE(bp, "read", (unsigned long)flags);
678 XFS_STATS_INC(xb_get_read); 602 XFS_STATS_INC(xb_get_read);
679 _xfs_buf_read(bp, flags); 603 _xfs_buf_read(bp, flags);
680 } else if (flags & XBF_ASYNC) { 604 } else if (flags & XBF_ASYNC) {
681 XB_TRACE(bp, "read_async", (unsigned long)flags);
682 /* 605 /*
683 * Read ahead call which is already satisfied, 606 * Read ahead call which is already satisfied,
684 * drop the buffer 607 * drop the buffer
685 */ 608 */
686 goto no_buffer; 609 goto no_buffer;
687 } else { 610 } else {
688 XB_TRACE(bp, "read_done", (unsigned long)flags);
689 /* We do not want read in the flags */ 611 /* We do not want read in the flags */
690 bp->b_flags &= ~XBF_READ; 612 bp->b_flags &= ~XBF_READ;
691 } 613 }
@@ -718,7 +640,7 @@ xfs_buf_readahead(
718 return; 640 return;
719 641
720 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); 642 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
721 xfs_buf_read_flags(target, ioff, isize, flags); 643 xfs_buf_read(target, ioff, isize, flags);
722} 644}
723 645
724xfs_buf_t * 646xfs_buf_t *
@@ -823,7 +745,7 @@ xfs_buf_get_noaddr(
823 745
824 xfs_buf_unlock(bp); 746 xfs_buf_unlock(bp);
825 747
826 XB_TRACE(bp, "no_daddr", len); 748 trace_xfs_buf_get_noaddr(bp, _RET_IP_);
827 return bp; 749 return bp;
828 750
829 fail_free_mem: 751 fail_free_mem:
@@ -845,8 +767,8 @@ void
845xfs_buf_hold( 767xfs_buf_hold(
846 xfs_buf_t *bp) 768 xfs_buf_t *bp)
847{ 769{
770 trace_xfs_buf_hold(bp, _RET_IP_);
848 atomic_inc(&bp->b_hold); 771 atomic_inc(&bp->b_hold);
849 XB_TRACE(bp, "hold", 0);
850} 772}
851 773
852/* 774/*
@@ -859,7 +781,7 @@ xfs_buf_rele(
859{ 781{
860 xfs_bufhash_t *hash = bp->b_hash; 782 xfs_bufhash_t *hash = bp->b_hash;
861 783
862 XB_TRACE(bp, "rele", bp->b_relse); 784 trace_xfs_buf_rele(bp, _RET_IP_);
863 785
864 if (unlikely(!hash)) { 786 if (unlikely(!hash)) {
865 ASSERT(!bp->b_relse); 787 ASSERT(!bp->b_relse);
@@ -909,21 +831,19 @@ xfs_buf_cond_lock(
909 int locked; 831 int locked;
910 832
911 locked = down_trylock(&bp->b_sema) == 0; 833 locked = down_trylock(&bp->b_sema) == 0;
912 if (locked) { 834 if (locked)
913 XB_SET_OWNER(bp); 835 XB_SET_OWNER(bp);
914 } 836
915 XB_TRACE(bp, "cond_lock", (long)locked); 837 trace_xfs_buf_cond_lock(bp, _RET_IP_);
916 return locked ? 0 : -EBUSY; 838 return locked ? 0 : -EBUSY;
917} 839}
918 840
919#if defined(DEBUG) || defined(XFS_BLI_TRACE)
920int 841int
921xfs_buf_lock_value( 842xfs_buf_lock_value(
922 xfs_buf_t *bp) 843 xfs_buf_t *bp)
923{ 844{
924 return bp->b_sema.count; 845 return bp->b_sema.count;
925} 846}
926#endif
927 847
928/* 848/*
929 * Locks a buffer object. 849 * Locks a buffer object.
@@ -935,12 +855,14 @@ void
935xfs_buf_lock( 855xfs_buf_lock(
936 xfs_buf_t *bp) 856 xfs_buf_t *bp)
937{ 857{
938 XB_TRACE(bp, "lock", 0); 858 trace_xfs_buf_lock(bp, _RET_IP_);
859
939 if (atomic_read(&bp->b_io_remaining)) 860 if (atomic_read(&bp->b_io_remaining))
940 blk_run_address_space(bp->b_target->bt_mapping); 861 blk_run_address_space(bp->b_target->bt_mapping);
941 down(&bp->b_sema); 862 down(&bp->b_sema);
942 XB_SET_OWNER(bp); 863 XB_SET_OWNER(bp);
943 XB_TRACE(bp, "locked", 0); 864
865 trace_xfs_buf_lock_done(bp, _RET_IP_);
944} 866}
945 867
946/* 868/*
@@ -962,7 +884,8 @@ xfs_buf_unlock(
962 884
963 XB_CLEAR_OWNER(bp); 885 XB_CLEAR_OWNER(bp);
964 up(&bp->b_sema); 886 up(&bp->b_sema);
965 XB_TRACE(bp, "unlock", 0); 887
888 trace_xfs_buf_unlock(bp, _RET_IP_);
966} 889}
967 890
968 891
@@ -974,17 +897,18 @@ void
974xfs_buf_pin( 897xfs_buf_pin(
975 xfs_buf_t *bp) 898 xfs_buf_t *bp)
976{ 899{
900 trace_xfs_buf_pin(bp, _RET_IP_);
977 atomic_inc(&bp->b_pin_count); 901 atomic_inc(&bp->b_pin_count);
978 XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
979} 902}
980 903
981void 904void
982xfs_buf_unpin( 905xfs_buf_unpin(
983 xfs_buf_t *bp) 906 xfs_buf_t *bp)
984{ 907{
908 trace_xfs_buf_unpin(bp, _RET_IP_);
909
985 if (atomic_dec_and_test(&bp->b_pin_count)) 910 if (atomic_dec_and_test(&bp->b_pin_count))
986 wake_up_all(&bp->b_waiters); 911 wake_up_all(&bp->b_waiters);
987 XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
988} 912}
989 913
990int 914int
@@ -1035,7 +959,7 @@ xfs_buf_iodone_work(
1035 */ 959 */
1036 if ((bp->b_error == EOPNOTSUPP) && 960 if ((bp->b_error == EOPNOTSUPP) &&
1037 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { 961 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
1038 XB_TRACE(bp, "ordered_retry", bp->b_iodone); 962 trace_xfs_buf_ordered_retry(bp, _RET_IP_);
1039 bp->b_flags &= ~XBF_ORDERED; 963 bp->b_flags &= ~XBF_ORDERED;
1040 bp->b_flags |= _XFS_BARRIER_FAILED; 964 bp->b_flags |= _XFS_BARRIER_FAILED;
1041 xfs_buf_iorequest(bp); 965 xfs_buf_iorequest(bp);
@@ -1050,12 +974,12 @@ xfs_buf_ioend(
1050 xfs_buf_t *bp, 974 xfs_buf_t *bp,
1051 int schedule) 975 int schedule)
1052{ 976{
977 trace_xfs_buf_iodone(bp, _RET_IP_);
978
1053 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); 979 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1054 if (bp->b_error == 0) 980 if (bp->b_error == 0)
1055 bp->b_flags |= XBF_DONE; 981 bp->b_flags |= XBF_DONE;
1056 982
1057 XB_TRACE(bp, "iodone", bp->b_iodone);
1058
1059 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { 983 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
1060 if (schedule) { 984 if (schedule) {
1061 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); 985 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
@@ -1075,26 +999,34 @@ xfs_buf_ioerror(
1075{ 999{
1076 ASSERT(error >= 0 && error <= 0xffff); 1000 ASSERT(error >= 0 && error <= 0xffff);
1077 bp->b_error = (unsigned short)error; 1001 bp->b_error = (unsigned short)error;
1078 XB_TRACE(bp, "ioerror", (unsigned long)error); 1002 trace_xfs_buf_ioerror(bp, error, _RET_IP_);
1079} 1003}
1080 1004
1081int 1005int
1082xfs_bawrite( 1006xfs_bwrite(
1083 void *mp, 1007 struct xfs_mount *mp,
1084 struct xfs_buf *bp) 1008 struct xfs_buf *bp)
1085{ 1009{
1086 XB_TRACE(bp, "bawrite", 0); 1010 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
1011 int error = 0;
1087 1012
1088 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 1013 bp->b_strat = xfs_bdstrat_cb;
1014 bp->b_mount = mp;
1015 bp->b_flags |= XBF_WRITE;
1016 if (!iowait)
1017 bp->b_flags |= _XBF_RUN_QUEUES;
1089 1018
1090 xfs_buf_delwri_dequeue(bp); 1019 xfs_buf_delwri_dequeue(bp);
1020 xfs_buf_iostrategy(bp);
1091 1021
1092 bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); 1022 if (iowait) {
1093 bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); 1023 error = xfs_buf_iowait(bp);
1024 if (error)
1025 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1026 xfs_buf_relse(bp);
1027 }
1094 1028
1095 bp->b_mount = mp; 1029 return error;
1096 bp->b_strat = xfs_bdstrat_cb;
1097 return xfs_bdstrat_cb(bp);
1098} 1030}
1099 1031
1100void 1032void
@@ -1102,7 +1034,7 @@ xfs_bdwrite(
1102 void *mp, 1034 void *mp,
1103 struct xfs_buf *bp) 1035 struct xfs_buf *bp)
1104{ 1036{
1105 XB_TRACE(bp, "bdwrite", 0); 1037 trace_xfs_buf_bdwrite(bp, _RET_IP_);
1106 1038
1107 bp->b_strat = xfs_bdstrat_cb; 1039 bp->b_strat = xfs_bdstrat_cb;
1108 bp->b_mount = mp; 1040 bp->b_mount = mp;
@@ -1113,7 +1045,127 @@ xfs_bdwrite(
1113 xfs_buf_delwri_queue(bp, 1); 1045 xfs_buf_delwri_queue(bp, 1);
1114} 1046}
1115 1047
1116STATIC_INLINE void 1048/*
1049 * Called when we want to stop a buffer from getting written or read.
1050 * We attach the EIO error, muck with its flags, and call biodone
1051 * so that the proper iodone callbacks get called.
1052 */
1053STATIC int
1054xfs_bioerror(
1055 xfs_buf_t *bp)
1056{
1057#ifdef XFSERRORDEBUG
1058 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
1059#endif
1060
1061 /*
1062 * No need to wait until the buffer is unpinned, we aren't flushing it.
1063 */
1064 XFS_BUF_ERROR(bp, EIO);
1065
1066 /*
1067 * We're calling biodone, so delete XBF_DONE flag.
1068 */
1069 XFS_BUF_UNREAD(bp);
1070 XFS_BUF_UNDELAYWRITE(bp);
1071 XFS_BUF_UNDONE(bp);
1072 XFS_BUF_STALE(bp);
1073
1074 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1075 xfs_biodone(bp);
1076
1077 return EIO;
1078}
1079
1080/*
1081 * Same as xfs_bioerror, except that we are releasing the buffer
1082 * here ourselves, and avoiding the biodone call.
1083 * This is meant for userdata errors; metadata bufs come with
1084 * iodone functions attached, so that we can track down errors.
1085 */
1086STATIC int
1087xfs_bioerror_relse(
1088 struct xfs_buf *bp)
1089{
1090 int64_t fl = XFS_BUF_BFLAGS(bp);
1091 /*
1092 * No need to wait until the buffer is unpinned.
1093 * We aren't flushing it.
1094 *
1095 * chunkhold expects B_DONE to be set, whether
1096 * we actually finish the I/O or not. We don't want to
1097 * change that interface.
1098 */
1099 XFS_BUF_UNREAD(bp);
1100 XFS_BUF_UNDELAYWRITE(bp);
1101 XFS_BUF_DONE(bp);
1102 XFS_BUF_STALE(bp);
1103 XFS_BUF_CLR_IODONE_FUNC(bp);
1104 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1105 if (!(fl & XBF_ASYNC)) {
1106 /*
1107 * Mark b_error and B_ERROR _both_.
1108 * Lot's of chunkcache code assumes that.
1109 * There's no reason to mark error for
1110 * ASYNC buffers.
1111 */
1112 XFS_BUF_ERROR(bp, EIO);
1113 XFS_BUF_FINISH_IOWAIT(bp);
1114 } else {
1115 xfs_buf_relse(bp);
1116 }
1117
1118 return EIO;
1119}
1120
1121
1122/*
1123 * All xfs metadata buffers except log state machine buffers
1124 * get this attached as their b_bdstrat callback function.
1125 * This is so that we can catch a buffer
1126 * after prematurely unpinning it to forcibly shutdown the filesystem.
1127 */
1128int
1129xfs_bdstrat_cb(
1130 struct xfs_buf *bp)
1131{
1132 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1133 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1134 /*
1135 * Metadata write that didn't get logged but
1136 * written delayed anyway. These aren't associated
1137 * with a transaction, and can be ignored.
1138 */
1139 if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
1140 return xfs_bioerror_relse(bp);
1141 else
1142 return xfs_bioerror(bp);
1143 }
1144
1145 xfs_buf_iorequest(bp);
1146 return 0;
1147}
1148
1149/*
1150 * Wrapper around bdstrat so that we can stop data from going to disk in case
1151 * we are shutting down the filesystem. Typically user data goes thru this
1152 * path; one of the exceptions is the superblock.
1153 */
1154void
1155xfsbdstrat(
1156 struct xfs_mount *mp,
1157 struct xfs_buf *bp)
1158{
1159 if (XFS_FORCED_SHUTDOWN(mp)) {
1160 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1161 xfs_bioerror_relse(bp);
1162 return;
1163 }
1164
1165 xfs_buf_iorequest(bp);
1166}
1167
1168STATIC void
1117_xfs_buf_ioend( 1169_xfs_buf_ioend(
1118 xfs_buf_t *bp, 1170 xfs_buf_t *bp,
1119 int schedule) 1171 int schedule)
@@ -1135,6 +1187,9 @@ xfs_buf_bio_end_io(
1135 1187
1136 xfs_buf_ioerror(bp, -error); 1188 xfs_buf_ioerror(bp, -error);
1137 1189
1190 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1191 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1192
1138 do { 1193 do {
1139 struct page *page = bvec->bv_page; 1194 struct page *page = bvec->bv_page;
1140 1195
@@ -1177,10 +1232,14 @@ _xfs_buf_ioapply(
1177 if (bp->b_flags & XBF_ORDERED) { 1232 if (bp->b_flags & XBF_ORDERED) {
1178 ASSERT(!(bp->b_flags & XBF_READ)); 1233 ASSERT(!(bp->b_flags & XBF_READ));
1179 rw = WRITE_BARRIER; 1234 rw = WRITE_BARRIER;
1180 } else if (bp->b_flags & _XBF_RUN_QUEUES) { 1235 } else if (bp->b_flags & XBF_LOG_BUFFER) {
1181 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1236 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1182 bp->b_flags &= ~_XBF_RUN_QUEUES; 1237 bp->b_flags &= ~_XBF_RUN_QUEUES;
1183 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; 1238 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
1239 } else if (bp->b_flags & _XBF_RUN_QUEUES) {
1240 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1241 bp->b_flags &= ~_XBF_RUN_QUEUES;
1242 rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
1184 } else { 1243 } else {
1185 rw = (bp->b_flags & XBF_WRITE) ? WRITE : 1244 rw = (bp->b_flags & XBF_WRITE) ? WRITE :
1186 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1245 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
@@ -1240,6 +1299,10 @@ next_chunk:
1240 1299
1241submit_io: 1300submit_io:
1242 if (likely(bio->bi_size)) { 1301 if (likely(bio->bi_size)) {
1302 if (xfs_buf_is_vmapped(bp)) {
1303 flush_kernel_vmap_range(bp->b_addr,
1304 xfs_buf_vmap_len(bp));
1305 }
1243 submit_bio(rw, bio); 1306 submit_bio(rw, bio);
1244 if (size) 1307 if (size)
1245 goto next_chunk; 1308 goto next_chunk;
@@ -1253,7 +1316,7 @@ int
1253xfs_buf_iorequest( 1316xfs_buf_iorequest(
1254 xfs_buf_t *bp) 1317 xfs_buf_t *bp)
1255{ 1318{
1256 XB_TRACE(bp, "iorequest", 0); 1319 trace_xfs_buf_iorequest(bp, _RET_IP_);
1257 1320
1258 if (bp->b_flags & XBF_DELWRI) { 1321 if (bp->b_flags & XBF_DELWRI) {
1259 xfs_buf_delwri_queue(bp, 1); 1322 xfs_buf_delwri_queue(bp, 1);
@@ -1287,11 +1350,13 @@ int
1287xfs_buf_iowait( 1350xfs_buf_iowait(
1288 xfs_buf_t *bp) 1351 xfs_buf_t *bp)
1289{ 1352{
1290 XB_TRACE(bp, "iowait", 0); 1353 trace_xfs_buf_iowait(bp, _RET_IP_);
1354
1291 if (atomic_read(&bp->b_io_remaining)) 1355 if (atomic_read(&bp->b_io_remaining))
1292 blk_run_address_space(bp->b_target->bt_mapping); 1356 blk_run_address_space(bp->b_target->bt_mapping);
1293 wait_for_completion(&bp->b_iowait); 1357 wait_for_completion(&bp->b_iowait);
1294 XB_TRACE(bp, "iowaited", (long)bp->b_error); 1358
1359 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1295 return bp->b_error; 1360 return bp->b_error;
1296} 1361}
1297 1362
@@ -1318,7 +1383,7 @@ xfs_buf_iomove(
1318 xfs_buf_t *bp, /* buffer to process */ 1383 xfs_buf_t *bp, /* buffer to process */
1319 size_t boff, /* starting buffer offset */ 1384 size_t boff, /* starting buffer offset */
1320 size_t bsize, /* length to copy */ 1385 size_t bsize, /* length to copy */
1321 caddr_t data, /* data address */ 1386 void *data, /* data address */
1322 xfs_buf_rw_t mode) /* read/write/zero flag */ 1387 xfs_buf_rw_t mode) /* read/write/zero flag */
1323{ 1388{
1324 size_t bend, cpoff, csize; 1389 size_t bend, cpoff, csize;
@@ -1400,8 +1465,8 @@ xfs_alloc_bufhash(
1400 1465
1401 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ 1466 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
1402 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; 1467 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
1403 btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * 1468 btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
1404 sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); 1469 sizeof(xfs_bufhash_t));
1405 for (i = 0; i < (1 << btp->bt_hashshift); i++) { 1470 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
1406 spin_lock_init(&btp->bt_hash[i].bh_lock); 1471 spin_lock_init(&btp->bt_hash[i].bh_lock);
1407 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); 1472 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
@@ -1412,7 +1477,7 @@ STATIC void
1412xfs_free_bufhash( 1477xfs_free_bufhash(
1413 xfs_buftarg_t *btp) 1478 xfs_buftarg_t *btp)
1414{ 1479{
1415 kmem_free(btp->bt_hash); 1480 kmem_free_large(btp->bt_hash);
1416 btp->bt_hash = NULL; 1481 btp->bt_hash = NULL;
1417} 1482}
1418 1483
@@ -1604,7 +1669,8 @@ xfs_buf_delwri_queue(
1604 struct list_head *dwq = &bp->b_target->bt_delwrite_queue; 1669 struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
1605 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; 1670 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1606 1671
1607 XB_TRACE(bp, "delwri_q", (long)unlock); 1672 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
1673
1608 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); 1674 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
1609 1675
1610 spin_lock(dwlk); 1676 spin_lock(dwlk);
@@ -1616,6 +1682,11 @@ xfs_buf_delwri_queue(
1616 list_del(&bp->b_list); 1682 list_del(&bp->b_list);
1617 } 1683 }
1618 1684
1685 if (list_empty(dwq)) {
1686 /* start xfsbufd as it is about to have something to do */
1687 wake_up_process(bp->b_target->bt_task);
1688 }
1689
1619 bp->b_flags |= _XBF_DELWRI_Q; 1690 bp->b_flags |= _XBF_DELWRI_Q;
1620 list_add_tail(&bp->b_list, dwq); 1691 list_add_tail(&bp->b_list, dwq);
1621 bp->b_queuetime = jiffies; 1692 bp->b_queuetime = jiffies;
@@ -1644,7 +1715,36 @@ xfs_buf_delwri_dequeue(
1644 if (dequeued) 1715 if (dequeued)
1645 xfs_buf_rele(bp); 1716 xfs_buf_rele(bp);
1646 1717
1647 XB_TRACE(bp, "delwri_dq", (long)dequeued); 1718 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
1719}
1720
1721/*
1722 * If a delwri buffer needs to be pushed before it has aged out, then promote
1723 * it to the head of the delwri queue so that it will be flushed on the next
1724 * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
1725 * than the age currently needed to flush the buffer. Hence the next time the
1726 * xfsbufd sees it is guaranteed to be considered old enough to flush.
1727 */
1728void
1729xfs_buf_delwri_promote(
1730 struct xfs_buf *bp)
1731{
1732 struct xfs_buftarg *btp = bp->b_target;
1733 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
1734
1735 ASSERT(bp->b_flags & XBF_DELWRI);
1736 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1737
1738 /*
1739 * Check the buffer age before locking the delayed write queue as we
1740 * don't need to promote buffers that are already past the flush age.
1741 */
1742 if (bp->b_queuetime < jiffies - age)
1743 return;
1744 bp->b_queuetime = jiffies - age;
1745 spin_lock(&btp->bt_delwrite_lock);
1746 list_move(&bp->b_list, &btp->bt_delwrite_queue);
1747 spin_unlock(&btp->bt_delwrite_lock);
1648} 1748}
1649 1749
1650STATIC void 1750STATIC void
@@ -1665,6 +1765,8 @@ xfsbufd_wakeup(
1665 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { 1765 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1666 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) 1766 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1667 continue; 1767 continue;
1768 if (list_empty(&btp->bt_delwrite_queue))
1769 continue;
1668 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); 1770 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1669 wake_up_process(btp->bt_task); 1771 wake_up_process(btp->bt_task);
1670 } 1772 }
@@ -1692,7 +1794,7 @@ xfs_buf_delwri_split(
1692 INIT_LIST_HEAD(list); 1794 INIT_LIST_HEAD(list);
1693 spin_lock(dwlk); 1795 spin_lock(dwlk);
1694 list_for_each_entry_safe(bp, n, dwq, b_list) { 1796 list_for_each_entry_safe(bp, n, dwq, b_list) {
1695 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); 1797 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1696 ASSERT(bp->b_flags & XBF_DELWRI); 1798 ASSERT(bp->b_flags & XBF_DELWRI);
1697 1799
1698 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { 1800 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1715,20 +1817,53 @@ xfs_buf_delwri_split(
1715 1817
1716} 1818}
1717 1819
1820/*
1821 * Compare function is more complex than it needs to be because
1822 * the return value is only 32 bits and we are doing comparisons
1823 * on 64 bit values
1824 */
1825static int
1826xfs_buf_cmp(
1827 void *priv,
1828 struct list_head *a,
1829 struct list_head *b)
1830{
1831 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1832 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1833 xfs_daddr_t diff;
1834
1835 diff = ap->b_bn - bp->b_bn;
1836 if (diff < 0)
1837 return -1;
1838 if (diff > 0)
1839 return 1;
1840 return 0;
1841}
1842
1843void
1844xfs_buf_delwri_sort(
1845 xfs_buftarg_t *target,
1846 struct list_head *list)
1847{
1848 list_sort(NULL, list, xfs_buf_cmp);
1849}
1850
1718STATIC int 1851STATIC int
1719xfsbufd( 1852xfsbufd(
1720 void *data) 1853 void *data)
1721{ 1854{
1722 struct list_head tmp; 1855 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1723 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1724 int count;
1725 xfs_buf_t *bp;
1726 1856
1727 current->flags |= PF_MEMALLOC; 1857 current->flags |= PF_MEMALLOC;
1728 1858
1729 set_freezable(); 1859 set_freezable();
1730 1860
1731 do { 1861 do {
1862 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1863 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1864 int count = 0;
1865 struct list_head tmp;
1866
1732 if (unlikely(freezing(current))) { 1867 if (unlikely(freezing(current))) {
1733 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1868 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1734 refrigerator(); 1869 refrigerator();
@@ -1736,24 +1871,20 @@ xfsbufd(
1736 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1871 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1737 } 1872 }
1738 1873
1739 schedule_timeout_interruptible( 1874 /* sleep for a long time if there is nothing to do. */
1740 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1875 if (list_empty(&target->bt_delwrite_queue))
1876 tout = MAX_SCHEDULE_TIMEOUT;
1877 schedule_timeout_interruptible(tout);
1741 1878
1742 xfs_buf_delwri_split(target, &tmp, 1879 xfs_buf_delwri_split(target, &tmp, age);
1743 xfs_buf_age_centisecs * msecs_to_jiffies(10)); 1880 list_sort(NULL, &tmp, xfs_buf_cmp);
1744
1745 count = 0;
1746 while (!list_empty(&tmp)) { 1881 while (!list_empty(&tmp)) {
1747 bp = list_entry(tmp.next, xfs_buf_t, b_list); 1882 struct xfs_buf *bp;
1748 ASSERT(target == bp->b_target); 1883 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1749
1750 list_del_init(&bp->b_list); 1884 list_del_init(&bp->b_list);
1751 xfs_buf_iostrategy(bp); 1885 xfs_buf_iostrategy(bp);
1752 count++; 1886 count++;
1753 } 1887 }
1754
1755 if (as_list_len > 0)
1756 purge_addresses();
1757 if (count) 1888 if (count)
1758 blk_run_address_space(target->bt_mapping); 1889 blk_run_address_space(target->bt_mapping);
1759 1890
@@ -1772,42 +1903,45 @@ xfs_flush_buftarg(
1772 xfs_buftarg_t *target, 1903 xfs_buftarg_t *target,
1773 int wait) 1904 int wait)
1774{ 1905{
1775 struct list_head tmp; 1906 xfs_buf_t *bp;
1776 xfs_buf_t *bp, *n;
1777 int pincount = 0; 1907 int pincount = 0;
1908 LIST_HEAD(tmp_list);
1909 LIST_HEAD(wait_list);
1778 1910
1779 xfs_buf_runall_queues(xfsconvertd_workqueue); 1911 xfs_buf_runall_queues(xfsconvertd_workqueue);
1780 xfs_buf_runall_queues(xfsdatad_workqueue); 1912 xfs_buf_runall_queues(xfsdatad_workqueue);
1781 xfs_buf_runall_queues(xfslogd_workqueue); 1913 xfs_buf_runall_queues(xfslogd_workqueue);
1782 1914
1783 set_bit(XBT_FORCE_FLUSH, &target->bt_flags); 1915 set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1784 pincount = xfs_buf_delwri_split(target, &tmp, 0); 1916 pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
1785 1917
1786 /* 1918 /*
1787 * Dropped the delayed write list lock, now walk the temporary list 1919 * Dropped the delayed write list lock, now walk the temporary list.
1920 * All I/O is issued async and then if we need to wait for completion
1921 * we do that after issuing all the IO.
1788 */ 1922 */
1789 list_for_each_entry_safe(bp, n, &tmp, b_list) { 1923 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1924 while (!list_empty(&tmp_list)) {
1925 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1790 ASSERT(target == bp->b_target); 1926 ASSERT(target == bp->b_target);
1791 if (wait) 1927 list_del_init(&bp->b_list);
1928 if (wait) {
1792 bp->b_flags &= ~XBF_ASYNC; 1929 bp->b_flags &= ~XBF_ASYNC;
1793 else 1930 list_add(&bp->b_list, &wait_list);
1794 list_del_init(&bp->b_list); 1931 }
1795
1796 xfs_buf_iostrategy(bp); 1932 xfs_buf_iostrategy(bp);
1797 } 1933 }
1798 1934
1799 if (wait) 1935 if (wait) {
1936 /* Expedite and wait for IO to complete. */
1800 blk_run_address_space(target->bt_mapping); 1937 blk_run_address_space(target->bt_mapping);
1938 while (!list_empty(&wait_list)) {
1939 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1801 1940
1802 /* 1941 list_del_init(&bp->b_list);
1803 * Remaining list items must be flushed before returning 1942 xfs_iowait(bp);
1804 */ 1943 xfs_buf_relse(bp);
1805 while (!list_empty(&tmp)) { 1944 }
1806 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1807
1808 list_del_init(&bp->b_list);
1809 xfs_iowait(bp);
1810 xfs_buf_relse(bp);
1811 } 1945 }
1812 1946
1813 return pincount; 1947 return pincount;
@@ -1816,14 +1950,10 @@ xfs_flush_buftarg(
1816int __init 1950int __init
1817xfs_buf_init(void) 1951xfs_buf_init(void)
1818{ 1952{
1819#ifdef XFS_BUF_TRACE
1820 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS);
1821#endif
1822
1823 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", 1953 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
1824 KM_ZONE_HWALIGN, NULL); 1954 KM_ZONE_HWALIGN, NULL);
1825 if (!xfs_buf_zone) 1955 if (!xfs_buf_zone)
1826 goto out_free_trace_buf; 1956 goto out;
1827 1957
1828 xfslogd_workqueue = create_workqueue("xfslogd"); 1958 xfslogd_workqueue = create_workqueue("xfslogd");
1829 if (!xfslogd_workqueue) 1959 if (!xfslogd_workqueue)
@@ -1846,10 +1976,7 @@ xfs_buf_init(void)
1846 destroy_workqueue(xfslogd_workqueue); 1976 destroy_workqueue(xfslogd_workqueue);
1847 out_free_buf_zone: 1977 out_free_buf_zone:
1848 kmem_zone_destroy(xfs_buf_zone); 1978 kmem_zone_destroy(xfs_buf_zone);
1849 out_free_trace_buf: 1979 out:
1850#ifdef XFS_BUF_TRACE
1851 ktrace_free(xfs_buf_trace_buf);
1852#endif
1853 return -ENOMEM; 1980 return -ENOMEM;
1854} 1981}
1855 1982
@@ -1861,9 +1988,6 @@ xfs_buf_terminate(void)
1861 destroy_workqueue(xfsdatad_workqueue); 1988 destroy_workqueue(xfsdatad_workqueue);
1862 destroy_workqueue(xfslogd_workqueue); 1989 destroy_workqueue(xfslogd_workqueue);
1863 kmem_zone_destroy(xfs_buf_zone); 1990 kmem_zone_destroy(xfs_buf_zone);
1864#ifdef XFS_BUF_TRACE
1865 ktrace_free(xfs_buf_trace_buf);
1866#endif
1867} 1991}
1868 1992
1869#ifdef CONFIG_KDB_MODULES 1993#ifdef CONFIG_KDB_MODULES