aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1373
1 files changed, 666 insertions, 707 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6fe21d2b8847..e44b7c1a3a36 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -31,76 +31,77 @@
31#include <linux/kthread.h> 31#include <linux/kthread.h>
32#include "xfs_linux.h" 32#include "xfs_linux.h"
33 33
34STATIC kmem_cache_t *pagebuf_zone; 34STATIC kmem_zone_t *xfs_buf_zone;
35STATIC kmem_shaker_t pagebuf_shake; 35STATIC kmem_shaker_t xfs_buf_shake;
36STATIC int xfsbufd(void *);
36STATIC int xfsbufd_wakeup(int, gfp_t); 37STATIC int xfsbufd_wakeup(int, gfp_t);
37STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); 38STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
38 39
39STATIC struct workqueue_struct *xfslogd_workqueue; 40STATIC struct workqueue_struct *xfslogd_workqueue;
40struct workqueue_struct *xfsdatad_workqueue; 41struct workqueue_struct *xfsdatad_workqueue;
41 42
42#ifdef PAGEBUF_TRACE 43#ifdef XFS_BUF_TRACE
43void 44void
44pagebuf_trace( 45xfs_buf_trace(
45 xfs_buf_t *pb, 46 xfs_buf_t *bp,
46 char *id, 47 char *id,
47 void *data, 48 void *data,
48 void *ra) 49 void *ra)
49{ 50{
50 ktrace_enter(pagebuf_trace_buf, 51 ktrace_enter(xfs_buf_trace_buf,
51 pb, id, 52 bp, id,
52 (void *)(unsigned long)pb->pb_flags, 53 (void *)(unsigned long)bp->b_flags,
53 (void *)(unsigned long)pb->pb_hold.counter, 54 (void *)(unsigned long)bp->b_hold.counter,
54 (void *)(unsigned long)pb->pb_sema.count.counter, 55 (void *)(unsigned long)bp->b_sema.count.counter,
55 (void *)current, 56 (void *)current,
56 data, ra, 57 data, ra,
57 (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff), 58 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
58 (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff), 59 (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
59 (void *)(unsigned long)pb->pb_buffer_length, 60 (void *)(unsigned long)bp->b_buffer_length,
60 NULL, NULL, NULL, NULL, NULL); 61 NULL, NULL, NULL, NULL, NULL);
61} 62}
62ktrace_t *pagebuf_trace_buf; 63ktrace_t *xfs_buf_trace_buf;
63#define PAGEBUF_TRACE_SIZE 4096 64#define XFS_BUF_TRACE_SIZE 4096
64#define PB_TRACE(pb, id, data) \ 65#define XB_TRACE(bp, id, data) \
65 pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0)) 66 xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
66#else 67#else
67#define PB_TRACE(pb, id, data) do { } while (0) 68#define XB_TRACE(bp, id, data) do { } while (0)
68#endif 69#endif
69 70
70#ifdef PAGEBUF_LOCK_TRACKING 71#ifdef XFS_BUF_LOCK_TRACKING
71# define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid) 72# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
72# define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1) 73# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
73# define PB_GET_OWNER(pb) ((pb)->pb_last_holder) 74# define XB_GET_OWNER(bp) ((bp)->b_last_holder)
74#else 75#else
75# define PB_SET_OWNER(pb) do { } while (0) 76# define XB_SET_OWNER(bp) do { } while (0)
76# define PB_CLEAR_OWNER(pb) do { } while (0) 77# define XB_CLEAR_OWNER(bp) do { } while (0)
77# define PB_GET_OWNER(pb) do { } while (0) 78# define XB_GET_OWNER(bp) do { } while (0)
78#endif 79#endif
79 80
80#define pb_to_gfp(flags) \ 81#define xb_to_gfp(flags) \
81 ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \ 82 ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
82 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) 83 ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
83 84
84#define pb_to_km(flags) \ 85#define xb_to_km(flags) \
85 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) 86 (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
86 87
87#define pagebuf_allocate(flags) \ 88#define xfs_buf_allocate(flags) \
88 kmem_zone_alloc(pagebuf_zone, pb_to_km(flags)) 89 kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
89#define pagebuf_deallocate(pb) \ 90#define xfs_buf_deallocate(bp) \
90 kmem_zone_free(pagebuf_zone, (pb)); 91 kmem_zone_free(xfs_buf_zone, (bp));
91 92
92/* 93/*
93 * Page Region interfaces. 94 * Page Region interfaces.
94 * 95 *
95 * For pages in filesystems where the blocksize is smaller than the 96 * For pages in filesystems where the blocksize is smaller than the
96 * pagesize, we use the page->private field (long) to hold a bitmap 97 * pagesize, we use the page->private field (long) to hold a bitmap
97 * of uptodate regions within the page. 98 * of uptodate regions within the page.
98 * 99 *
99 * Each such region is "bytes per page / bits per long" bytes long. 100 * Each such region is "bytes per page / bits per long" bytes long.
100 * 101 *
101 * NBPPR == number-of-bytes-per-page-region 102 * NBPPR == number-of-bytes-per-page-region
102 * BTOPR == bytes-to-page-region (rounded up) 103 * BTOPR == bytes-to-page-region (rounded up)
103 * BTOPRT == bytes-to-page-region-truncated (rounded down) 104 * BTOPRT == bytes-to-page-region-truncated (rounded down)
104 */ 105 */
105#if (BITS_PER_LONG == 32) 106#if (BITS_PER_LONG == 32)
106#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ 107#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
@@ -159,7 +160,7 @@ test_page_region(
159} 160}
160 161
161/* 162/*
162 * Mapping of multi-page buffers into contiguous virtual space 163 * Mapping of multi-page buffers into contiguous virtual space
163 */ 164 */
164 165
165typedef struct a_list { 166typedef struct a_list {
@@ -172,7 +173,7 @@ STATIC int as_list_len;
172STATIC DEFINE_SPINLOCK(as_lock); 173STATIC DEFINE_SPINLOCK(as_lock);
173 174
174/* 175/*
175 * Try to batch vunmaps because they are costly. 176 * Try to batch vunmaps because they are costly.
176 */ 177 */
177STATIC void 178STATIC void
178free_address( 179free_address(
@@ -215,83 +216,83 @@ purge_addresses(void)
215} 216}
216 217
217/* 218/*
218 * Internal pagebuf object manipulation 219 * Internal xfs_buf_t object manipulation
219 */ 220 */
220 221
221STATIC void 222STATIC void
222_pagebuf_initialize( 223_xfs_buf_initialize(
223 xfs_buf_t *pb, 224 xfs_buf_t *bp,
224 xfs_buftarg_t *target, 225 xfs_buftarg_t *target,
225 loff_t range_base, 226 xfs_off_t range_base,
226 size_t range_length, 227 size_t range_length,
227 page_buf_flags_t flags) 228 xfs_buf_flags_t flags)
228{ 229{
229 /* 230 /*
230 * We don't want certain flags to appear in pb->pb_flags. 231 * We don't want certain flags to appear in b_flags.
231 */ 232 */
232 flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD); 233 flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
233 234
234 memset(pb, 0, sizeof(xfs_buf_t)); 235 memset(bp, 0, sizeof(xfs_buf_t));
235 atomic_set(&pb->pb_hold, 1); 236 atomic_set(&bp->b_hold, 1);
236 init_MUTEX_LOCKED(&pb->pb_iodonesema); 237 init_MUTEX_LOCKED(&bp->b_iodonesema);
237 INIT_LIST_HEAD(&pb->pb_list); 238 INIT_LIST_HEAD(&bp->b_list);
238 INIT_LIST_HEAD(&pb->pb_hash_list); 239 INIT_LIST_HEAD(&bp->b_hash_list);
239 init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */ 240 init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
240 PB_SET_OWNER(pb); 241 XB_SET_OWNER(bp);
241 pb->pb_target = target; 242 bp->b_target = target;
242 pb->pb_file_offset = range_base; 243 bp->b_file_offset = range_base;
243 /* 244 /*
244 * Set buffer_length and count_desired to the same value initially. 245 * Set buffer_length and count_desired to the same value initially.
245 * I/O routines should use count_desired, which will be the same in 246 * I/O routines should use count_desired, which will be the same in
246 * most cases but may be reset (e.g. XFS recovery). 247 * most cases but may be reset (e.g. XFS recovery).
247 */ 248 */
248 pb->pb_buffer_length = pb->pb_count_desired = range_length; 249 bp->b_buffer_length = bp->b_count_desired = range_length;
249 pb->pb_flags = flags; 250 bp->b_flags = flags;
250 pb->pb_bn = XFS_BUF_DADDR_NULL; 251 bp->b_bn = XFS_BUF_DADDR_NULL;
251 atomic_set(&pb->pb_pin_count, 0); 252 atomic_set(&bp->b_pin_count, 0);
252 init_waitqueue_head(&pb->pb_waiters); 253 init_waitqueue_head(&bp->b_waiters);
253 254
254 XFS_STATS_INC(pb_create); 255 XFS_STATS_INC(xb_create);
255 PB_TRACE(pb, "initialize", target); 256 XB_TRACE(bp, "initialize", target);
256} 257}
257 258
258/* 259/*
259 * Allocate a page array capable of holding a specified number 260 * Allocate a page array capable of holding a specified number
260 * of pages, and point the page buf at it. 261 * of pages, and point the page buf at it.
261 */ 262 */
262STATIC int 263STATIC int
263_pagebuf_get_pages( 264_xfs_buf_get_pages(
264 xfs_buf_t *pb, 265 xfs_buf_t *bp,
265 int page_count, 266 int page_count,
266 page_buf_flags_t flags) 267 xfs_buf_flags_t flags)
267{ 268{
268 /* Make sure that we have a page list */ 269 /* Make sure that we have a page list */
269 if (pb->pb_pages == NULL) { 270 if (bp->b_pages == NULL) {
270 pb->pb_offset = page_buf_poff(pb->pb_file_offset); 271 bp->b_offset = xfs_buf_poff(bp->b_file_offset);
271 pb->pb_page_count = page_count; 272 bp->b_page_count = page_count;
272 if (page_count <= PB_PAGES) { 273 if (page_count <= XB_PAGES) {
273 pb->pb_pages = pb->pb_page_array; 274 bp->b_pages = bp->b_page_array;
274 } else { 275 } else {
275 pb->pb_pages = kmem_alloc(sizeof(struct page *) * 276 bp->b_pages = kmem_alloc(sizeof(struct page *) *
276 page_count, pb_to_km(flags)); 277 page_count, xb_to_km(flags));
277 if (pb->pb_pages == NULL) 278 if (bp->b_pages == NULL)
278 return -ENOMEM; 279 return -ENOMEM;
279 } 280 }
280 memset(pb->pb_pages, 0, sizeof(struct page *) * page_count); 281 memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
281 } 282 }
282 return 0; 283 return 0;
283} 284}
284 285
285/* 286/*
286 * Frees pb_pages if it was malloced. 287 * Frees b_pages if it was allocated.
287 */ 288 */
288STATIC void 289STATIC void
289_pagebuf_free_pages( 290_xfs_buf_free_pages(
290 xfs_buf_t *bp) 291 xfs_buf_t *bp)
291{ 292{
292 if (bp->pb_pages != bp->pb_page_array) { 293 if (bp->b_pages != bp->b_page_array) {
293 kmem_free(bp->pb_pages, 294 kmem_free(bp->b_pages,
294 bp->pb_page_count * sizeof(struct page *)); 295 bp->b_page_count * sizeof(struct page *));
295 } 296 }
296} 297}
297 298
@@ -299,79 +300,79 @@ _pagebuf_free_pages(
299 * Releases the specified buffer. 300 * Releases the specified buffer.
300 * 301 *
301 * The modification state of any associated pages is left unchanged. 302 * The modification state of any associated pages is left unchanged.
302 * The buffer most not be on any hash - use pagebuf_rele instead for 303 * The buffer most not be on any hash - use xfs_buf_rele instead for
303 * hashed and refcounted buffers 304 * hashed and refcounted buffers
304 */ 305 */
305void 306void
306pagebuf_free( 307xfs_buf_free(
307 xfs_buf_t *bp) 308 xfs_buf_t *bp)
308{ 309{
309 PB_TRACE(bp, "free", 0); 310 XB_TRACE(bp, "free", 0);
310 311
311 ASSERT(list_empty(&bp->pb_hash_list)); 312 ASSERT(list_empty(&bp->b_hash_list));
312 313
313 if (bp->pb_flags & _PBF_PAGE_CACHE) { 314 if (bp->b_flags & _XBF_PAGE_CACHE) {
314 uint i; 315 uint i;
315 316
316 if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1)) 317 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
317 free_address(bp->pb_addr - bp->pb_offset); 318 free_address(bp->b_addr - bp->b_offset);
318 319
319 for (i = 0; i < bp->pb_page_count; i++) 320 for (i = 0; i < bp->b_page_count; i++)
320 page_cache_release(bp->pb_pages[i]); 321 page_cache_release(bp->b_pages[i]);
321 _pagebuf_free_pages(bp); 322 _xfs_buf_free_pages(bp);
322 } else if (bp->pb_flags & _PBF_KMEM_ALLOC) { 323 } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
323 /* 324 /*
324 * XXX(hch): bp->pb_count_desired might be incorrect (see 325 * XXX(hch): bp->b_count_desired might be incorrect (see
325 * pagebuf_associate_memory for details), but fortunately 326 * xfs_buf_associate_memory for details), but fortunately
326 * the Linux version of kmem_free ignores the len argument.. 327 * the Linux version of kmem_free ignores the len argument..
327 */ 328 */
328 kmem_free(bp->pb_addr, bp->pb_count_desired); 329 kmem_free(bp->b_addr, bp->b_count_desired);
329 _pagebuf_free_pages(bp); 330 _xfs_buf_free_pages(bp);
330 } 331 }
331 332
332 pagebuf_deallocate(bp); 333 xfs_buf_deallocate(bp);
333} 334}
334 335
335/* 336/*
336 * Finds all pages for buffer in question and builds it's page list. 337 * Finds all pages for buffer in question and builds it's page list.
337 */ 338 */
338STATIC int 339STATIC int
339_pagebuf_lookup_pages( 340_xfs_buf_lookup_pages(
340 xfs_buf_t *bp, 341 xfs_buf_t *bp,
341 uint flags) 342 uint flags)
342{ 343{
343 struct address_space *mapping = bp->pb_target->pbr_mapping; 344 struct address_space *mapping = bp->b_target->bt_mapping;
344 size_t blocksize = bp->pb_target->pbr_bsize; 345 size_t blocksize = bp->b_target->bt_bsize;
345 size_t size = bp->pb_count_desired; 346 size_t size = bp->b_count_desired;
346 size_t nbytes, offset; 347 size_t nbytes, offset;
347 gfp_t gfp_mask = pb_to_gfp(flags); 348 gfp_t gfp_mask = xb_to_gfp(flags);
348 unsigned short page_count, i; 349 unsigned short page_count, i;
349 pgoff_t first; 350 pgoff_t first;
350 loff_t end; 351 xfs_off_t end;
351 int error; 352 int error;
352 353
353 end = bp->pb_file_offset + bp->pb_buffer_length; 354 end = bp->b_file_offset + bp->b_buffer_length;
354 page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset); 355 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
355 356
356 error = _pagebuf_get_pages(bp, page_count, flags); 357 error = _xfs_buf_get_pages(bp, page_count, flags);
357 if (unlikely(error)) 358 if (unlikely(error))
358 return error; 359 return error;
359 bp->pb_flags |= _PBF_PAGE_CACHE; 360 bp->b_flags |= _XBF_PAGE_CACHE;
360 361
361 offset = bp->pb_offset; 362 offset = bp->b_offset;
362 first = bp->pb_file_offset >> PAGE_CACHE_SHIFT; 363 first = bp->b_file_offset >> PAGE_CACHE_SHIFT;
363 364
364 for (i = 0; i < bp->pb_page_count; i++) { 365 for (i = 0; i < bp->b_page_count; i++) {
365 struct page *page; 366 struct page *page;
366 uint retries = 0; 367 uint retries = 0;
367 368
368 retry: 369 retry:
369 page = find_or_create_page(mapping, first + i, gfp_mask); 370 page = find_or_create_page(mapping, first + i, gfp_mask);
370 if (unlikely(page == NULL)) { 371 if (unlikely(page == NULL)) {
371 if (flags & PBF_READ_AHEAD) { 372 if (flags & XBF_READ_AHEAD) {
372 bp->pb_page_count = i; 373 bp->b_page_count = i;
373 for (i = 0; i < bp->pb_page_count; i++) 374 for (i = 0; i < bp->b_page_count; i++)
374 unlock_page(bp->pb_pages[i]); 375 unlock_page(bp->b_pages[i]);
375 return -ENOMEM; 376 return -ENOMEM;
376 } 377 }
377 378
@@ -387,13 +388,13 @@ _pagebuf_lookup_pages(
387 "deadlock in %s (mode:0x%x)\n", 388 "deadlock in %s (mode:0x%x)\n",
388 __FUNCTION__, gfp_mask); 389 __FUNCTION__, gfp_mask);
389 390
390 XFS_STATS_INC(pb_page_retries); 391 XFS_STATS_INC(xb_page_retries);
391 xfsbufd_wakeup(0, gfp_mask); 392 xfsbufd_wakeup(0, gfp_mask);
392 blk_congestion_wait(WRITE, HZ/50); 393 blk_congestion_wait(WRITE, HZ/50);
393 goto retry; 394 goto retry;
394 } 395 }
395 396
396 XFS_STATS_INC(pb_page_found); 397 XFS_STATS_INC(xb_page_found);
397 398
398 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 399 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
399 size -= nbytes; 400 size -= nbytes;
@@ -401,27 +402,27 @@ _pagebuf_lookup_pages(
401 if (!PageUptodate(page)) { 402 if (!PageUptodate(page)) {
402 page_count--; 403 page_count--;
403 if (blocksize >= PAGE_CACHE_SIZE) { 404 if (blocksize >= PAGE_CACHE_SIZE) {
404 if (flags & PBF_READ) 405 if (flags & XBF_READ)
405 bp->pb_locked = 1; 406 bp->b_locked = 1;
406 } else if (!PagePrivate(page)) { 407 } else if (!PagePrivate(page)) {
407 if (test_page_region(page, offset, nbytes)) 408 if (test_page_region(page, offset, nbytes))
408 page_count++; 409 page_count++;
409 } 410 }
410 } 411 }
411 412
412 bp->pb_pages[i] = page; 413 bp->b_pages[i] = page;
413 offset = 0; 414 offset = 0;
414 } 415 }
415 416
416 if (!bp->pb_locked) { 417 if (!bp->b_locked) {
417 for (i = 0; i < bp->pb_page_count; i++) 418 for (i = 0; i < bp->b_page_count; i++)
418 unlock_page(bp->pb_pages[i]); 419 unlock_page(bp->b_pages[i]);
419 } 420 }
420 421
421 if (page_count == bp->pb_page_count) 422 if (page_count == bp->b_page_count)
422 bp->pb_flags |= PBF_DONE; 423 bp->b_flags |= XBF_DONE;
423 424
424 PB_TRACE(bp, "lookup_pages", (long)page_count); 425 XB_TRACE(bp, "lookup_pages", (long)page_count);
425 return error; 426 return error;
426} 427}
427 428
@@ -429,23 +430,23 @@ _pagebuf_lookup_pages(
429 * Map buffer into kernel address-space if nessecary. 430 * Map buffer into kernel address-space if nessecary.
430 */ 431 */
431STATIC int 432STATIC int
432_pagebuf_map_pages( 433_xfs_buf_map_pages(
433 xfs_buf_t *bp, 434 xfs_buf_t *bp,
434 uint flags) 435 uint flags)
435{ 436{
436 /* A single page buffer is always mappable */ 437 /* A single page buffer is always mappable */
437 if (bp->pb_page_count == 1) { 438 if (bp->b_page_count == 1) {
438 bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset; 439 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
439 bp->pb_flags |= PBF_MAPPED; 440 bp->b_flags |= XBF_MAPPED;
440 } else if (flags & PBF_MAPPED) { 441 } else if (flags & XBF_MAPPED) {
441 if (as_list_len > 64) 442 if (as_list_len > 64)
442 purge_addresses(); 443 purge_addresses();
443 bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count, 444 bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
444 VM_MAP, PAGE_KERNEL); 445 VM_MAP, PAGE_KERNEL);
445 if (unlikely(bp->pb_addr == NULL)) 446 if (unlikely(bp->b_addr == NULL))
446 return -ENOMEM; 447 return -ENOMEM;
447 bp->pb_addr += bp->pb_offset; 448 bp->b_addr += bp->b_offset;
448 bp->pb_flags |= PBF_MAPPED; 449 bp->b_flags |= XBF_MAPPED;
449 } 450 }
450 451
451 return 0; 452 return 0;
@@ -456,9 +457,7 @@ _pagebuf_map_pages(
456 */ 457 */
457 458
458/* 459/*
459 * _pagebuf_find 460 * Look up, and creates if absent, a lockable buffer for
460 *
461 * Looks up, and creates if absent, a lockable buffer for
462 * a given range of an inode. The buffer is returned 461 * a given range of an inode. The buffer is returned
463 * locked. If other overlapping buffers exist, they are 462 * locked. If other overlapping buffers exist, they are
464 * released before the new buffer is created and locked, 463 * released before the new buffer is created and locked,
@@ -466,55 +465,55 @@ _pagebuf_map_pages(
466 * are unlocked. No I/O is implied by this call. 465 * are unlocked. No I/O is implied by this call.
467 */ 466 */
468xfs_buf_t * 467xfs_buf_t *
469_pagebuf_find( 468_xfs_buf_find(
470 xfs_buftarg_t *btp, /* block device target */ 469 xfs_buftarg_t *btp, /* block device target */
471 loff_t ioff, /* starting offset of range */ 470 xfs_off_t ioff, /* starting offset of range */
472 size_t isize, /* length of range */ 471 size_t isize, /* length of range */
473 page_buf_flags_t flags, /* PBF_TRYLOCK */ 472 xfs_buf_flags_t flags,
474 xfs_buf_t *new_pb)/* newly allocated buffer */ 473 xfs_buf_t *new_bp)
475{ 474{
476 loff_t range_base; 475 xfs_off_t range_base;
477 size_t range_length; 476 size_t range_length;
478 xfs_bufhash_t *hash; 477 xfs_bufhash_t *hash;
479 xfs_buf_t *pb, *n; 478 xfs_buf_t *bp, *n;
480 479
481 range_base = (ioff << BBSHIFT); 480 range_base = (ioff << BBSHIFT);
482 range_length = (isize << BBSHIFT); 481 range_length = (isize << BBSHIFT);
483 482
484 /* Check for IOs smaller than the sector size / not sector aligned */ 483 /* Check for IOs smaller than the sector size / not sector aligned */
485 ASSERT(!(range_length < (1 << btp->pbr_sshift))); 484 ASSERT(!(range_length < (1 << btp->bt_sshift)));
486 ASSERT(!(range_base & (loff_t)btp->pbr_smask)); 485 ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
487 486
488 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; 487 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
489 488
490 spin_lock(&hash->bh_lock); 489 spin_lock(&hash->bh_lock);
491 490
492 list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) { 491 list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
493 ASSERT(btp == pb->pb_target); 492 ASSERT(btp == bp->b_target);
494 if (pb->pb_file_offset == range_base && 493 if (bp->b_file_offset == range_base &&
495 pb->pb_buffer_length == range_length) { 494 bp->b_buffer_length == range_length) {
496 /* 495 /*
497 * If we look at something bring it to the 496 * If we look at something, bring it to the
498 * front of the list for next time. 497 * front of the list for next time.
499 */ 498 */
500 atomic_inc(&pb->pb_hold); 499 atomic_inc(&bp->b_hold);
501 list_move(&pb->pb_hash_list, &hash->bh_list); 500 list_move(&bp->b_hash_list, &hash->bh_list);
502 goto found; 501 goto found;
503 } 502 }
504 } 503 }
505 504
506 /* No match found */ 505 /* No match found */
507 if (new_pb) { 506 if (new_bp) {
508 _pagebuf_initialize(new_pb, btp, range_base, 507 _xfs_buf_initialize(new_bp, btp, range_base,
509 range_length, flags); 508 range_length, flags);
510 new_pb->pb_hash = hash; 509 new_bp->b_hash = hash;
511 list_add(&new_pb->pb_hash_list, &hash->bh_list); 510 list_add(&new_bp->b_hash_list, &hash->bh_list);
512 } else { 511 } else {
513 XFS_STATS_INC(pb_miss_locked); 512 XFS_STATS_INC(xb_miss_locked);
514 } 513 }
515 514
516 spin_unlock(&hash->bh_lock); 515 spin_unlock(&hash->bh_lock);
517 return new_pb; 516 return new_bp;
518 517
519found: 518found:
520 spin_unlock(&hash->bh_lock); 519 spin_unlock(&hash->bh_lock);
@@ -523,74 +522,72 @@ found:
523 * if this does not work then we need to drop the 522 * if this does not work then we need to drop the
524 * spinlock and do a hard attempt on the semaphore. 523 * spinlock and do a hard attempt on the semaphore.
525 */ 524 */
526 if (down_trylock(&pb->pb_sema)) { 525 if (down_trylock(&bp->b_sema)) {
527 if (!(flags & PBF_TRYLOCK)) { 526 if (!(flags & XBF_TRYLOCK)) {
528 /* wait for buffer ownership */ 527 /* wait for buffer ownership */
529 PB_TRACE(pb, "get_lock", 0); 528 XB_TRACE(bp, "get_lock", 0);
530 pagebuf_lock(pb); 529 xfs_buf_lock(bp);
531 XFS_STATS_INC(pb_get_locked_waited); 530 XFS_STATS_INC(xb_get_locked_waited);
532 } else { 531 } else {
533 /* We asked for a trylock and failed, no need 532 /* We asked for a trylock and failed, no need
534 * to look at file offset and length here, we 533 * to look at file offset and length here, we
535 * know that this pagebuf at least overlaps our 534 * know that this buffer at least overlaps our
536 * pagebuf and is locked, therefore our buffer 535 * buffer and is locked, therefore our buffer
537 * either does not exist, or is this buffer 536 * either does not exist, or is this buffer.
538 */ 537 */
539 538 xfs_buf_rele(bp);
540 pagebuf_rele(pb); 539 XFS_STATS_INC(xb_busy_locked);
541 XFS_STATS_INC(pb_busy_locked); 540 return NULL;
542 return (NULL);
543 } 541 }
544 } else { 542 } else {
545 /* trylock worked */ 543 /* trylock worked */
546 PB_SET_OWNER(pb); 544 XB_SET_OWNER(bp);
547 } 545 }
548 546
549 if (pb->pb_flags & PBF_STALE) { 547 if (bp->b_flags & XBF_STALE) {
550 ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0); 548 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
551 pb->pb_flags &= PBF_MAPPED; 549 bp->b_flags &= XBF_MAPPED;
552 } 550 }
553 PB_TRACE(pb, "got_lock", 0); 551 XB_TRACE(bp, "got_lock", 0);
554 XFS_STATS_INC(pb_get_locked); 552 XFS_STATS_INC(xb_get_locked);
555 return (pb); 553 return bp;
556} 554}
557 555
558/* 556/*
559 * xfs_buf_get_flags assembles a buffer covering the specified range. 557 * Assembles a buffer covering the specified range.
560 *
561 * Storage in memory for all portions of the buffer will be allocated, 558 * Storage in memory for all portions of the buffer will be allocated,
562 * although backing storage may not be. 559 * although backing storage may not be.
563 */ 560 */
564xfs_buf_t * 561xfs_buf_t *
565xfs_buf_get_flags( /* allocate a buffer */ 562xfs_buf_get_flags(
566 xfs_buftarg_t *target,/* target for buffer */ 563 xfs_buftarg_t *target,/* target for buffer */
567 loff_t ioff, /* starting offset of range */ 564 xfs_off_t ioff, /* starting offset of range */
568 size_t isize, /* length of range */ 565 size_t isize, /* length of range */
569 page_buf_flags_t flags) /* PBF_TRYLOCK */ 566 xfs_buf_flags_t flags)
570{ 567{
571 xfs_buf_t *pb, *new_pb; 568 xfs_buf_t *bp, *new_bp;
572 int error = 0, i; 569 int error = 0, i;
573 570
574 new_pb = pagebuf_allocate(flags); 571 new_bp = xfs_buf_allocate(flags);
575 if (unlikely(!new_pb)) 572 if (unlikely(!new_bp))
576 return NULL; 573 return NULL;
577 574
578 pb = _pagebuf_find(target, ioff, isize, flags, new_pb); 575 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
579 if (pb == new_pb) { 576 if (bp == new_bp) {
580 error = _pagebuf_lookup_pages(pb, flags); 577 error = _xfs_buf_lookup_pages(bp, flags);
581 if (error) 578 if (error)
582 goto no_buffer; 579 goto no_buffer;
583 } else { 580 } else {
584 pagebuf_deallocate(new_pb); 581 xfs_buf_deallocate(new_bp);
585 if (unlikely(pb == NULL)) 582 if (unlikely(bp == NULL))
586 return NULL; 583 return NULL;
587 } 584 }
588 585
589 for (i = 0; i < pb->pb_page_count; i++) 586 for (i = 0; i < bp->b_page_count; i++)
590 mark_page_accessed(pb->pb_pages[i]); 587 mark_page_accessed(bp->b_pages[i]);
591 588
592 if (!(pb->pb_flags & PBF_MAPPED)) { 589 if (!(bp->b_flags & XBF_MAPPED)) {
593 error = _pagebuf_map_pages(pb, flags); 590 error = _xfs_buf_map_pages(bp, flags);
594 if (unlikely(error)) { 591 if (unlikely(error)) {
595 printk(KERN_WARNING "%s: failed to map pages\n", 592 printk(KERN_WARNING "%s: failed to map pages\n",
596 __FUNCTION__); 593 __FUNCTION__);
@@ -598,97 +595,97 @@ xfs_buf_get_flags( /* allocate a buffer */
598 } 595 }
599 } 596 }
600 597
601 XFS_STATS_INC(pb_get); 598 XFS_STATS_INC(xb_get);
602 599
603 /* 600 /*
604 * Always fill in the block number now, the mapped cases can do 601 * Always fill in the block number now, the mapped cases can do
605 * their own overlay of this later. 602 * their own overlay of this later.
606 */ 603 */
607 pb->pb_bn = ioff; 604 bp->b_bn = ioff;
608 pb->pb_count_desired = pb->pb_buffer_length; 605 bp->b_count_desired = bp->b_buffer_length;
609 606
610 PB_TRACE(pb, "get", (unsigned long)flags); 607 XB_TRACE(bp, "get", (unsigned long)flags);
611 return pb; 608 return bp;
612 609
613 no_buffer: 610 no_buffer:
614 if (flags & (PBF_LOCK | PBF_TRYLOCK)) 611 if (flags & (XBF_LOCK | XBF_TRYLOCK))
615 pagebuf_unlock(pb); 612 xfs_buf_unlock(bp);
616 pagebuf_rele(pb); 613 xfs_buf_rele(bp);
617 return NULL; 614 return NULL;
618} 615}
619 616
620xfs_buf_t * 617xfs_buf_t *
621xfs_buf_read_flags( 618xfs_buf_read_flags(
622 xfs_buftarg_t *target, 619 xfs_buftarg_t *target,
623 loff_t ioff, 620 xfs_off_t ioff,
624 size_t isize, 621 size_t isize,
625 page_buf_flags_t flags) 622 xfs_buf_flags_t flags)
626{ 623{
627 xfs_buf_t *pb; 624 xfs_buf_t *bp;
628 625
629 flags |= PBF_READ; 626 flags |= XBF_READ;
630 627
631 pb = xfs_buf_get_flags(target, ioff, isize, flags); 628 bp = xfs_buf_get_flags(target, ioff, isize, flags);
632 if (pb) { 629 if (bp) {
633 if (!XFS_BUF_ISDONE(pb)) { 630 if (!XFS_BUF_ISDONE(bp)) {
634 PB_TRACE(pb, "read", (unsigned long)flags); 631 XB_TRACE(bp, "read", (unsigned long)flags);
635 XFS_STATS_INC(pb_get_read); 632 XFS_STATS_INC(xb_get_read);
636 pagebuf_iostart(pb, flags); 633 xfs_buf_iostart(bp, flags);
637 } else if (flags & PBF_ASYNC) { 634 } else if (flags & XBF_ASYNC) {
638 PB_TRACE(pb, "read_async", (unsigned long)flags); 635 XB_TRACE(bp, "read_async", (unsigned long)flags);
639 /* 636 /*
640 * Read ahead call which is already satisfied, 637 * Read ahead call which is already satisfied,
641 * drop the buffer 638 * drop the buffer
642 */ 639 */
643 goto no_buffer; 640 goto no_buffer;
644 } else { 641 } else {
645 PB_TRACE(pb, "read_done", (unsigned long)flags); 642 XB_TRACE(bp, "read_done", (unsigned long)flags);
646 /* We do not want read in the flags */ 643 /* We do not want read in the flags */
647 pb->pb_flags &= ~PBF_READ; 644 bp->b_flags &= ~XBF_READ;
648 } 645 }
649 } 646 }
650 647
651 return pb; 648 return bp;
652 649
653 no_buffer: 650 no_buffer:
654 if (flags & (PBF_LOCK | PBF_TRYLOCK)) 651 if (flags & (XBF_LOCK | XBF_TRYLOCK))
655 pagebuf_unlock(pb); 652 xfs_buf_unlock(bp);
656 pagebuf_rele(pb); 653 xfs_buf_rele(bp);
657 return NULL; 654 return NULL;
658} 655}
659 656
660/* 657/*
661 * If we are not low on memory then do the readahead in a deadlock 658 * If we are not low on memory then do the readahead in a deadlock
662 * safe manner. 659 * safe manner.
663 */ 660 */
664void 661void
665pagebuf_readahead( 662xfs_buf_readahead(
666 xfs_buftarg_t *target, 663 xfs_buftarg_t *target,
667 loff_t ioff, 664 xfs_off_t ioff,
668 size_t isize, 665 size_t isize,
669 page_buf_flags_t flags) 666 xfs_buf_flags_t flags)
670{ 667{
671 struct backing_dev_info *bdi; 668 struct backing_dev_info *bdi;
672 669
673 bdi = target->pbr_mapping->backing_dev_info; 670 bdi = target->bt_mapping->backing_dev_info;
674 if (bdi_read_congested(bdi)) 671 if (bdi_read_congested(bdi))
675 return; 672 return;
676 673
677 flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD); 674 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
678 xfs_buf_read_flags(target, ioff, isize, flags); 675 xfs_buf_read_flags(target, ioff, isize, flags);
679} 676}
680 677
681xfs_buf_t * 678xfs_buf_t *
682pagebuf_get_empty( 679xfs_buf_get_empty(
683 size_t len, 680 size_t len,
684 xfs_buftarg_t *target) 681 xfs_buftarg_t *target)
685{ 682{
686 xfs_buf_t *pb; 683 xfs_buf_t *bp;
687 684
688 pb = pagebuf_allocate(0); 685 bp = xfs_buf_allocate(0);
689 if (pb) 686 if (bp)
690 _pagebuf_initialize(pb, target, 0, len, 0); 687 _xfs_buf_initialize(bp, target, 0, len, 0);
691 return pb; 688 return bp;
692} 689}
693 690
694static inline struct page * 691static inline struct page *
@@ -704,8 +701,8 @@ mem_to_page(
704} 701}
705 702
706int 703int
707pagebuf_associate_memory( 704xfs_buf_associate_memory(
708 xfs_buf_t *pb, 705 xfs_buf_t *bp,
709 void *mem, 706 void *mem,
710 size_t len) 707 size_t len)
711{ 708{
@@ -722,40 +719,40 @@ pagebuf_associate_memory(
722 page_count++; 719 page_count++;
723 720
724 /* Free any previous set of page pointers */ 721 /* Free any previous set of page pointers */
725 if (pb->pb_pages) 722 if (bp->b_pages)
726 _pagebuf_free_pages(pb); 723 _xfs_buf_free_pages(bp);
727 724
728 pb->pb_pages = NULL; 725 bp->b_pages = NULL;
729 pb->pb_addr = mem; 726 bp->b_addr = mem;
730 727
731 rval = _pagebuf_get_pages(pb, page_count, 0); 728 rval = _xfs_buf_get_pages(bp, page_count, 0);
732 if (rval) 729 if (rval)
733 return rval; 730 return rval;
734 731
735 pb->pb_offset = offset; 732 bp->b_offset = offset;
736 ptr = (size_t) mem & PAGE_CACHE_MASK; 733 ptr = (size_t) mem & PAGE_CACHE_MASK;
737 end = PAGE_CACHE_ALIGN((size_t) mem + len); 734 end = PAGE_CACHE_ALIGN((size_t) mem + len);
738 end_cur = end; 735 end_cur = end;
739 /* set up first page */ 736 /* set up first page */
740 pb->pb_pages[0] = mem_to_page(mem); 737 bp->b_pages[0] = mem_to_page(mem);
741 738
742 ptr += PAGE_CACHE_SIZE; 739 ptr += PAGE_CACHE_SIZE;
743 pb->pb_page_count = ++i; 740 bp->b_page_count = ++i;
744 while (ptr < end) { 741 while (ptr < end) {
745 pb->pb_pages[i] = mem_to_page((void *)ptr); 742 bp->b_pages[i] = mem_to_page((void *)ptr);
746 pb->pb_page_count = ++i; 743 bp->b_page_count = ++i;
747 ptr += PAGE_CACHE_SIZE; 744 ptr += PAGE_CACHE_SIZE;
748 } 745 }
749 pb->pb_locked = 0; 746 bp->b_locked = 0;
750 747
751 pb->pb_count_desired = pb->pb_buffer_length = len; 748 bp->b_count_desired = bp->b_buffer_length = len;
752 pb->pb_flags |= PBF_MAPPED; 749 bp->b_flags |= XBF_MAPPED;
753 750
754 return 0; 751 return 0;
755} 752}
756 753
757xfs_buf_t * 754xfs_buf_t *
758pagebuf_get_no_daddr( 755xfs_buf_get_noaddr(
759 size_t len, 756 size_t len,
760 xfs_buftarg_t *target) 757 xfs_buftarg_t *target)
761{ 758{
@@ -764,10 +761,10 @@ pagebuf_get_no_daddr(
764 void *data; 761 void *data;
765 int error; 762 int error;
766 763
767 bp = pagebuf_allocate(0); 764 bp = xfs_buf_allocate(0);
768 if (unlikely(bp == NULL)) 765 if (unlikely(bp == NULL))
769 goto fail; 766 goto fail;
770 _pagebuf_initialize(bp, target, 0, len, 0); 767 _xfs_buf_initialize(bp, target, 0, len, 0);
771 768
772 try_again: 769 try_again:
773 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); 770 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
@@ -776,78 +773,73 @@ pagebuf_get_no_daddr(
776 773
777 /* check whether alignment matches.. */ 774 /* check whether alignment matches.. */
778 if ((__psunsigned_t)data != 775 if ((__psunsigned_t)data !=
779 ((__psunsigned_t)data & ~target->pbr_smask)) { 776 ((__psunsigned_t)data & ~target->bt_smask)) {
780 /* .. else double the size and try again */ 777 /* .. else double the size and try again */
781 kmem_free(data, malloc_len); 778 kmem_free(data, malloc_len);
782 malloc_len <<= 1; 779 malloc_len <<= 1;
783 goto try_again; 780 goto try_again;
784 } 781 }
785 782
786 error = pagebuf_associate_memory(bp, data, len); 783 error = xfs_buf_associate_memory(bp, data, len);
787 if (error) 784 if (error)
788 goto fail_free_mem; 785 goto fail_free_mem;
789 bp->pb_flags |= _PBF_KMEM_ALLOC; 786 bp->b_flags |= _XBF_KMEM_ALLOC;
790 787
791 pagebuf_unlock(bp); 788 xfs_buf_unlock(bp);
792 789
793 PB_TRACE(bp, "no_daddr", data); 790 XB_TRACE(bp, "no_daddr", data);
794 return bp; 791 return bp;
795 fail_free_mem: 792 fail_free_mem:
796 kmem_free(data, malloc_len); 793 kmem_free(data, malloc_len);
797 fail_free_buf: 794 fail_free_buf:
798 pagebuf_free(bp); 795 xfs_buf_free(bp);
799 fail: 796 fail:
800 return NULL; 797 return NULL;
801} 798}
802 799
803/* 800/*
804 * pagebuf_hold
805 *
806 * Increment reference count on buffer, to hold the buffer concurrently 801 * Increment reference count on buffer, to hold the buffer concurrently
807 * with another thread which may release (free) the buffer asynchronously. 802 * with another thread which may release (free) the buffer asynchronously.
808 *
809 * Must hold the buffer already to call this function. 803 * Must hold the buffer already to call this function.
810 */ 804 */
811void 805void
812pagebuf_hold( 806xfs_buf_hold(
813 xfs_buf_t *pb) 807 xfs_buf_t *bp)
814{ 808{
815 atomic_inc(&pb->pb_hold); 809 atomic_inc(&bp->b_hold);
816 PB_TRACE(pb, "hold", 0); 810 XB_TRACE(bp, "hold", 0);
817} 811}
818 812
819/* 813/*
820 * pagebuf_rele 814 * Releases a hold on the specified buffer. If the
821 * 815 * the hold count is 1, calls xfs_buf_free.
822 * pagebuf_rele releases a hold on the specified buffer. If the
823 * the hold count is 1, pagebuf_rele calls pagebuf_free.
824 */ 816 */
825void 817void
826pagebuf_rele( 818xfs_buf_rele(
827 xfs_buf_t *pb) 819 xfs_buf_t *bp)
828{ 820{
829 xfs_bufhash_t *hash = pb->pb_hash; 821 xfs_bufhash_t *hash = bp->b_hash;
830 822
831 PB_TRACE(pb, "rele", pb->pb_relse); 823 XB_TRACE(bp, "rele", bp->b_relse);
832 824
833 if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { 825 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
834 if (pb->pb_relse) { 826 if (bp->b_relse) {
835 atomic_inc(&pb->pb_hold); 827 atomic_inc(&bp->b_hold);
836 spin_unlock(&hash->bh_lock); 828 spin_unlock(&hash->bh_lock);
837 (*(pb->pb_relse)) (pb); 829 (*(bp->b_relse)) (bp);
838 } else if (pb->pb_flags & PBF_FS_MANAGED) { 830 } else if (bp->b_flags & XBF_FS_MANAGED) {
839 spin_unlock(&hash->bh_lock); 831 spin_unlock(&hash->bh_lock);
840 } else { 832 } else {
841 ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q))); 833 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
842 list_del_init(&pb->pb_hash_list); 834 list_del_init(&bp->b_hash_list);
843 spin_unlock(&hash->bh_lock); 835 spin_unlock(&hash->bh_lock);
844 pagebuf_free(pb); 836 xfs_buf_free(bp);
845 } 837 }
846 } else { 838 } else {
847 /* 839 /*
848 * Catch reference count leaks 840 * Catch reference count leaks
849 */ 841 */
850 ASSERT(atomic_read(&pb->pb_hold) >= 0); 842 ASSERT(atomic_read(&bp->b_hold) >= 0);
851 } 843 }
852} 844}
853 845
@@ -863,168 +855,122 @@ pagebuf_rele(
863 */ 855 */
864 856
865/* 857/*
866 * pagebuf_cond_lock 858 * Locks a buffer object, if it is not already locked.
867 * 859 * Note that this in no way locks the underlying pages, so it is only
868 * pagebuf_cond_lock locks a buffer object, if it is not already locked. 860 * useful for synchronizing concurrent use of buffer objects, not for
869 * Note that this in no way 861 * synchronizing independent access to the underlying pages.
870 * locks the underlying pages, so it is only useful for synchronizing
871 * concurrent use of page buffer objects, not for synchronizing independent
872 * access to the underlying pages.
873 */ 862 */
874int 863int
875pagebuf_cond_lock( /* lock buffer, if not locked */ 864xfs_buf_cond_lock(
876 /* returns -EBUSY if locked) */ 865 xfs_buf_t *bp)
877 xfs_buf_t *pb)
878{ 866{
879 int locked; 867 int locked;
880 868
881 locked = down_trylock(&pb->pb_sema) == 0; 869 locked = down_trylock(&bp->b_sema) == 0;
882 if (locked) { 870 if (locked) {
883 PB_SET_OWNER(pb); 871 XB_SET_OWNER(bp);
884 } 872 }
885 PB_TRACE(pb, "cond_lock", (long)locked); 873 XB_TRACE(bp, "cond_lock", (long)locked);
886 return(locked ? 0 : -EBUSY); 874 return locked ? 0 : -EBUSY;
887} 875}
888 876
889#if defined(DEBUG) || defined(XFS_BLI_TRACE) 877#if defined(DEBUG) || defined(XFS_BLI_TRACE)
890/*
891 * pagebuf_lock_value
892 *
893 * Return lock value for a pagebuf
894 */
895int 878int
896pagebuf_lock_value( 879xfs_buf_lock_value(
897 xfs_buf_t *pb) 880 xfs_buf_t *bp)
898{ 881{
899 return(atomic_read(&pb->pb_sema.count)); 882 return atomic_read(&bp->b_sema.count);
900} 883}
901#endif 884#endif
902 885
903/* 886/*
904 * pagebuf_lock 887 * Locks a buffer object.
905 * 888 * Note that this in no way locks the underlying pages, so it is only
906 * pagebuf_lock locks a buffer object. Note that this in no way 889 * useful for synchronizing concurrent use of buffer objects, not for
907 * locks the underlying pages, so it is only useful for synchronizing 890 * synchronizing independent access to the underlying pages.
908 * concurrent use of page buffer objects, not for synchronizing independent
909 * access to the underlying pages.
910 */ 891 */
911int 892void
912pagebuf_lock( 893xfs_buf_lock(
913 xfs_buf_t *pb) 894 xfs_buf_t *bp)
914{ 895{
915 PB_TRACE(pb, "lock", 0); 896 XB_TRACE(bp, "lock", 0);
916 if (atomic_read(&pb->pb_io_remaining)) 897 if (atomic_read(&bp->b_io_remaining))
917 blk_run_address_space(pb->pb_target->pbr_mapping); 898 blk_run_address_space(bp->b_target->bt_mapping);
918 down(&pb->pb_sema); 899 down(&bp->b_sema);
919 PB_SET_OWNER(pb); 900 XB_SET_OWNER(bp);
920 PB_TRACE(pb, "locked", 0); 901 XB_TRACE(bp, "locked", 0);
921 return 0;
922} 902}
923 903
924/* 904/*
925 * pagebuf_unlock 905 * Releases the lock on the buffer object.
926 *
927 * pagebuf_unlock releases the lock on the buffer object created by
928 * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
929 * created by pagebuf_pin).
930 *
931 * If the buffer is marked delwri but is not queued, do so before we 906 * If the buffer is marked delwri but is not queued, do so before we
932 * unlock the buffer as we need to set flags correctly. We also need to 907 * unlock the buffer as we need to set flags correctly. We also need to
933 * take a reference for the delwri queue because the unlocker is going to 908 * take a reference for the delwri queue because the unlocker is going to
934 * drop their's and they don't know we just queued it. 909 * drop their's and they don't know we just queued it.
935 */ 910 */
936void 911void
937pagebuf_unlock( /* unlock buffer */ 912xfs_buf_unlock(
938 xfs_buf_t *pb) /* buffer to unlock */ 913 xfs_buf_t *bp)
939{ 914{
940 if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) { 915 if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
941 atomic_inc(&pb->pb_hold); 916 atomic_inc(&bp->b_hold);
942 pb->pb_flags |= PBF_ASYNC; 917 bp->b_flags |= XBF_ASYNC;
943 pagebuf_delwri_queue(pb, 0); 918 xfs_buf_delwri_queue(bp, 0);
944 } 919 }
945 920
946 PB_CLEAR_OWNER(pb); 921 XB_CLEAR_OWNER(bp);
947 up(&pb->pb_sema); 922 up(&bp->b_sema);
948 PB_TRACE(pb, "unlock", 0); 923 XB_TRACE(bp, "unlock", 0);
949} 924}
950 925
951 926
952/* 927/*
953 * Pinning Buffer Storage in Memory 928 * Pinning Buffer Storage in Memory
954 */ 929 * Ensure that no attempt to force a buffer to disk will succeed.
955
956/*
957 * pagebuf_pin
958 *
959 * pagebuf_pin locks all of the memory represented by a buffer in
960 * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for
961 * the same or different buffers affecting a given page, will
962 * properly count the number of outstanding "pin" requests. The
963 * buffer may be released after the pagebuf_pin and a different
964 * buffer used when calling pagebuf_unpin, if desired.
965 * pagebuf_pin should be used by the file system when it wants be
966 * assured that no attempt will be made to force the affected
967 * memory to disk. It does not assure that a given logical page
968 * will not be moved to a different physical page.
969 */ 930 */
970void 931void
971pagebuf_pin( 932xfs_buf_pin(
972 xfs_buf_t *pb) 933 xfs_buf_t *bp)
973{ 934{
974 atomic_inc(&pb->pb_pin_count); 935 atomic_inc(&bp->b_pin_count);
975 PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter); 936 XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
976} 937}
977 938
978/*
979 * pagebuf_unpin
980 *
981 * pagebuf_unpin reverses the locking of memory performed by
982 * pagebuf_pin. Note that both functions affected the logical
983 * pages associated with the buffer, not the buffer itself.
984 */
985void 939void
986pagebuf_unpin( 940xfs_buf_unpin(
987 xfs_buf_t *pb) 941 xfs_buf_t *bp)
988{ 942{
989 if (atomic_dec_and_test(&pb->pb_pin_count)) { 943 if (atomic_dec_and_test(&bp->b_pin_count))
990 wake_up_all(&pb->pb_waiters); 944 wake_up_all(&bp->b_waiters);
991 } 945 XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
992 PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
993} 946}
994 947
995int 948int
996pagebuf_ispin( 949xfs_buf_ispin(
997 xfs_buf_t *pb) 950 xfs_buf_t *bp)
998{ 951{
999 return atomic_read(&pb->pb_pin_count); 952 return atomic_read(&bp->b_pin_count);
1000} 953}
1001 954
1002/* 955STATIC void
1003 * pagebuf_wait_unpin 956xfs_buf_wait_unpin(
1004 * 957 xfs_buf_t *bp)
1005 * pagebuf_wait_unpin waits until all of the memory associated
1006 * with the buffer is not longer locked in memory. It returns
1007 * immediately if none of the affected pages are locked.
1008 */
1009static inline void
1010_pagebuf_wait_unpin(
1011 xfs_buf_t *pb)
1012{ 958{
1013 DECLARE_WAITQUEUE (wait, current); 959 DECLARE_WAITQUEUE (wait, current);
1014 960
1015 if (atomic_read(&pb->pb_pin_count) == 0) 961 if (atomic_read(&bp->b_pin_count) == 0)
1016 return; 962 return;
1017 963
1018 add_wait_queue(&pb->pb_waiters, &wait); 964 add_wait_queue(&bp->b_waiters, &wait);
1019 for (;;) { 965 for (;;) {
1020 set_current_state(TASK_UNINTERRUPTIBLE); 966 set_current_state(TASK_UNINTERRUPTIBLE);
1021 if (atomic_read(&pb->pb_pin_count) == 0) 967 if (atomic_read(&bp->b_pin_count) == 0)
1022 break; 968 break;
1023 if (atomic_read(&pb->pb_io_remaining)) 969 if (atomic_read(&bp->b_io_remaining))
1024 blk_run_address_space(pb->pb_target->pbr_mapping); 970 blk_run_address_space(bp->b_target->bt_mapping);
1025 schedule(); 971 schedule();
1026 } 972 }
1027 remove_wait_queue(&pb->pb_waiters, &wait); 973 remove_wait_queue(&bp->b_waiters, &wait);
1028 set_current_state(TASK_RUNNING); 974 set_current_state(TASK_RUNNING);
1029} 975}
1030 976
@@ -1032,241 +978,216 @@ _pagebuf_wait_unpin(
1032 * Buffer Utility Routines 978 * Buffer Utility Routines
1033 */ 979 */
1034 980
1035/*
1036 * pagebuf_iodone
1037 *
1038 * pagebuf_iodone marks a buffer for which I/O is in progress
1039 * done with respect to that I/O. The pb_iodone routine, if
1040 * present, will be called as a side-effect.
1041 */
1042STATIC void 981STATIC void
1043pagebuf_iodone_work( 982xfs_buf_iodone_work(
1044 void *v) 983 void *v)
1045{ 984{
1046 xfs_buf_t *bp = (xfs_buf_t *)v; 985 xfs_buf_t *bp = (xfs_buf_t *)v;
1047 986
1048 if (bp->pb_iodone) 987 if (bp->b_iodone)
1049 (*(bp->pb_iodone))(bp); 988 (*(bp->b_iodone))(bp);
1050 else if (bp->pb_flags & PBF_ASYNC) 989 else if (bp->b_flags & XBF_ASYNC)
1051 xfs_buf_relse(bp); 990 xfs_buf_relse(bp);
1052} 991}
1053 992
1054void 993void
1055pagebuf_iodone( 994xfs_buf_ioend(
1056 xfs_buf_t *pb, 995 xfs_buf_t *bp,
1057 int schedule) 996 int schedule)
1058{ 997{
1059 pb->pb_flags &= ~(PBF_READ | PBF_WRITE); 998 bp->b_flags &= ~(XBF_READ | XBF_WRITE);
1060 if (pb->pb_error == 0) 999 if (bp->b_error == 0)
1061 pb->pb_flags |= PBF_DONE; 1000 bp->b_flags |= XBF_DONE;
1062 1001
1063 PB_TRACE(pb, "iodone", pb->pb_iodone); 1002 XB_TRACE(bp, "iodone", bp->b_iodone);
1064 1003
1065 if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { 1004 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
1066 if (schedule) { 1005 if (schedule) {
1067 INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); 1006 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);
1068 queue_work(xfslogd_workqueue, &pb->pb_iodone_work); 1007 queue_work(xfslogd_workqueue, &bp->b_iodone_work);
1069 } else { 1008 } else {
1070 pagebuf_iodone_work(pb); 1009 xfs_buf_iodone_work(bp);
1071 } 1010 }
1072 } else { 1011 } else {
1073 up(&pb->pb_iodonesema); 1012 up(&bp->b_iodonesema);
1074 } 1013 }
1075} 1014}
1076 1015
1077/*
1078 * pagebuf_ioerror
1079 *
1080 * pagebuf_ioerror sets the error code for a buffer.
1081 */
1082void 1016void
1083pagebuf_ioerror( /* mark/clear buffer error flag */ 1017xfs_buf_ioerror(
1084 xfs_buf_t *pb, /* buffer to mark */ 1018 xfs_buf_t *bp,
1085 int error) /* error to store (0 if none) */ 1019 int error)
1086{ 1020{
1087 ASSERT(error >= 0 && error <= 0xffff); 1021 ASSERT(error >= 0 && error <= 0xffff);
1088 pb->pb_error = (unsigned short)error; 1022 bp->b_error = (unsigned short)error;
1089 PB_TRACE(pb, "ioerror", (unsigned long)error); 1023 XB_TRACE(bp, "ioerror", (unsigned long)error);
1090} 1024}
1091 1025
1092/* 1026/*
1093 * pagebuf_iostart 1027 * Initiate I/O on a buffer, based on the flags supplied.
1094 * 1028 * The b_iodone routine in the buffer supplied will only be called
1095 * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
1096 * If necessary, it will arrange for any disk space allocation required,
1097 * and it will break up the request if the block mappings require it.
1098 * The pb_iodone routine in the buffer supplied will only be called
1099 * when all of the subsidiary I/O requests, if any, have been completed. 1029 * when all of the subsidiary I/O requests, if any, have been completed.
1100 * pagebuf_iostart calls the pagebuf_ioinitiate routine or
1101 * pagebuf_iorequest, if the former routine is not defined, to start
1102 * the I/O on a given low-level request.
1103 */ 1030 */
1104int 1031int
1105pagebuf_iostart( /* start I/O on a buffer */ 1032xfs_buf_iostart(
1106 xfs_buf_t *pb, /* buffer to start */ 1033 xfs_buf_t *bp,
1107 page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */ 1034 xfs_buf_flags_t flags)
1108 /* PBF_WRITE, PBF_DELWRI, */
1109 /* PBF_DONT_BLOCK */
1110{ 1035{
1111 int status = 0; 1036 int status = 0;
1112 1037
1113 PB_TRACE(pb, "iostart", (unsigned long)flags); 1038 XB_TRACE(bp, "iostart", (unsigned long)flags);
1114 1039
1115 if (flags & PBF_DELWRI) { 1040 if (flags & XBF_DELWRI) {
1116 pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC); 1041 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
1117 pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC); 1042 bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
1118 pagebuf_delwri_queue(pb, 1); 1043 xfs_buf_delwri_queue(bp, 1);
1119 return status; 1044 return status;
1120 } 1045 }
1121 1046
1122 pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \ 1047 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
1123 PBF_READ_AHEAD | _PBF_RUN_QUEUES); 1048 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
1124 pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \ 1049 bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \
1125 PBF_READ_AHEAD | _PBF_RUN_QUEUES); 1050 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
1126 1051
1127 BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL); 1052 BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);
1128 1053
1129 /* For writes allow an alternate strategy routine to precede 1054 /* For writes allow an alternate strategy routine to precede
1130 * the actual I/O request (which may not be issued at all in 1055 * the actual I/O request (which may not be issued at all in
1131 * a shutdown situation, for example). 1056 * a shutdown situation, for example).
1132 */ 1057 */
1133 status = (flags & PBF_WRITE) ? 1058 status = (flags & XBF_WRITE) ?
1134 pagebuf_iostrategy(pb) : pagebuf_iorequest(pb); 1059 xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
1135 1060
1136 /* Wait for I/O if we are not an async request. 1061 /* Wait for I/O if we are not an async request.
1137 * Note: async I/O request completion will release the buffer, 1062 * Note: async I/O request completion will release the buffer,
1138 * and that can already be done by this point. So using the 1063 * and that can already be done by this point. So using the
1139 * buffer pointer from here on, after async I/O, is invalid. 1064 * buffer pointer from here on, after async I/O, is invalid.
1140 */ 1065 */
1141 if (!status && !(flags & PBF_ASYNC)) 1066 if (!status && !(flags & XBF_ASYNC))
1142 status = pagebuf_iowait(pb); 1067 status = xfs_buf_iowait(bp);
1143 1068
1144 return status; 1069 return status;
1145} 1070}
1146 1071
1147/*
1148 * Helper routine for pagebuf_iorequest
1149 */
1150
1151STATIC __inline__ int 1072STATIC __inline__ int
1152_pagebuf_iolocked( 1073_xfs_buf_iolocked(
1153 xfs_buf_t *pb) 1074 xfs_buf_t *bp)
1154{ 1075{
1155 ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE)); 1076 ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE));
1156 if (pb->pb_flags & PBF_READ) 1077 if (bp->b_flags & XBF_READ)
1157 return pb->pb_locked; 1078 return bp->b_locked;
1158 return 0; 1079 return 0;
1159} 1080}
1160 1081
1161STATIC __inline__ void 1082STATIC __inline__ void
1162_pagebuf_iodone( 1083_xfs_buf_ioend(
1163 xfs_buf_t *pb, 1084 xfs_buf_t *bp,
1164 int schedule) 1085 int schedule)
1165{ 1086{
1166 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { 1087 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1167 pb->pb_locked = 0; 1088 bp->b_locked = 0;
1168 pagebuf_iodone(pb, schedule); 1089 xfs_buf_ioend(bp, schedule);
1169 } 1090 }
1170} 1091}
1171 1092
1172STATIC int 1093STATIC int
1173bio_end_io_pagebuf( 1094xfs_buf_bio_end_io(
1174 struct bio *bio, 1095 struct bio *bio,
1175 unsigned int bytes_done, 1096 unsigned int bytes_done,
1176 int error) 1097 int error)
1177{ 1098{
1178 xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; 1099 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1179 unsigned int blocksize = pb->pb_target->pbr_bsize; 1100 unsigned int blocksize = bp->b_target->bt_bsize;
1180 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1101 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1181 1102
1182 if (bio->bi_size) 1103 if (bio->bi_size)
1183 return 1; 1104 return 1;
1184 1105
1185 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1106 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1186 pb->pb_error = EIO; 1107 bp->b_error = EIO;
1187 1108
1188 do { 1109 do {
1189 struct page *page = bvec->bv_page; 1110 struct page *page = bvec->bv_page;
1190 1111
1191 if (unlikely(pb->pb_error)) { 1112 if (unlikely(bp->b_error)) {
1192 if (pb->pb_flags & PBF_READ) 1113 if (bp->b_flags & XBF_READ)
1193 ClearPageUptodate(page); 1114 ClearPageUptodate(page);
1194 SetPageError(page); 1115 SetPageError(page);
1195 } else if (blocksize == PAGE_CACHE_SIZE) { 1116 } else if (blocksize >= PAGE_CACHE_SIZE) {
1196 SetPageUptodate(page); 1117 SetPageUptodate(page);
1197 } else if (!PagePrivate(page) && 1118 } else if (!PagePrivate(page) &&
1198 (pb->pb_flags & _PBF_PAGE_CACHE)) { 1119 (bp->b_flags & _XBF_PAGE_CACHE)) {
1199 set_page_region(page, bvec->bv_offset, bvec->bv_len); 1120 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1200 } 1121 }
1201 1122
1202 if (--bvec >= bio->bi_io_vec) 1123 if (--bvec >= bio->bi_io_vec)
1203 prefetchw(&bvec->bv_page->flags); 1124 prefetchw(&bvec->bv_page->flags);
1204 1125
1205 if (_pagebuf_iolocked(pb)) { 1126 if (_xfs_buf_iolocked(bp)) {
1206 unlock_page(page); 1127 unlock_page(page);
1207 } 1128 }
1208 } while (bvec >= bio->bi_io_vec); 1129 } while (bvec >= bio->bi_io_vec);
1209 1130
1210 _pagebuf_iodone(pb, 1); 1131 _xfs_buf_ioend(bp, 1);
1211 bio_put(bio); 1132 bio_put(bio);
1212 return 0; 1133 return 0;
1213} 1134}
1214 1135
1215STATIC void 1136STATIC void
1216_pagebuf_ioapply( 1137_xfs_buf_ioapply(
1217 xfs_buf_t *pb) 1138 xfs_buf_t *bp)
1218{ 1139{
1219 int i, rw, map_i, total_nr_pages, nr_pages; 1140 int i, rw, map_i, total_nr_pages, nr_pages;
1220 struct bio *bio; 1141 struct bio *bio;
1221 int offset = pb->pb_offset; 1142 int offset = bp->b_offset;
1222 int size = pb->pb_count_desired; 1143 int size = bp->b_count_desired;
1223 sector_t sector = pb->pb_bn; 1144 sector_t sector = bp->b_bn;
1224 unsigned int blocksize = pb->pb_target->pbr_bsize; 1145 unsigned int blocksize = bp->b_target->bt_bsize;
1225 int locking = _pagebuf_iolocked(pb); 1146 int locking = _xfs_buf_iolocked(bp);
1226 1147
1227 total_nr_pages = pb->pb_page_count; 1148 total_nr_pages = bp->b_page_count;
1228 map_i = 0; 1149 map_i = 0;
1229 1150
1230 if (pb->pb_flags & _PBF_RUN_QUEUES) { 1151 if (bp->b_flags & _XBF_RUN_QUEUES) {
1231 pb->pb_flags &= ~_PBF_RUN_QUEUES; 1152 bp->b_flags &= ~_XBF_RUN_QUEUES;
1232 rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC; 1153 rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;
1233 } else { 1154 } else {
1234 rw = (pb->pb_flags & PBF_READ) ? READ : WRITE; 1155 rw = (bp->b_flags & XBF_READ) ? READ : WRITE;
1235 } 1156 }
1236 1157
1237 if (pb->pb_flags & PBF_ORDERED) { 1158 if (bp->b_flags & XBF_ORDERED) {
1238 ASSERT(!(pb->pb_flags & PBF_READ)); 1159 ASSERT(!(bp->b_flags & XBF_READ));
1239 rw = WRITE_BARRIER; 1160 rw = WRITE_BARRIER;
1240 } 1161 }
1241 1162
1242 /* Special code path for reading a sub page size pagebuf in -- 1163 /* Special code path for reading a sub page size buffer in --
1243 * we populate up the whole page, and hence the other metadata 1164 * we populate up the whole page, and hence the other metadata
1244 * in the same page. This optimization is only valid when the 1165 * in the same page. This optimization is only valid when the
1245 * filesystem block size and the page size are equal. 1166 * filesystem block size is not smaller than the page size.
1246 */ 1167 */
1247 if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) && 1168 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
1248 (pb->pb_flags & PBF_READ) && locking && 1169 (bp->b_flags & XBF_READ) && locking &&
1249 (blocksize == PAGE_CACHE_SIZE)) { 1170 (blocksize >= PAGE_CACHE_SIZE)) {
1250 bio = bio_alloc(GFP_NOIO, 1); 1171 bio = bio_alloc(GFP_NOIO, 1);
1251 1172
1252 bio->bi_bdev = pb->pb_target->pbr_bdev; 1173 bio->bi_bdev = bp->b_target->bt_bdev;
1253 bio->bi_sector = sector - (offset >> BBSHIFT); 1174 bio->bi_sector = sector - (offset >> BBSHIFT);
1254 bio->bi_end_io = bio_end_io_pagebuf; 1175 bio->bi_end_io = xfs_buf_bio_end_io;
1255 bio->bi_private = pb; 1176 bio->bi_private = bp;
1256 1177
1257 bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0); 1178 bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
1258 size = 0; 1179 size = 0;
1259 1180
1260 atomic_inc(&pb->pb_io_remaining); 1181 atomic_inc(&bp->b_io_remaining);
1261 1182
1262 goto submit_io; 1183 goto submit_io;
1263 } 1184 }
1264 1185
1265 /* Lock down the pages which we need to for the request */ 1186 /* Lock down the pages which we need to for the request */
1266 if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) { 1187 if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) {
1267 for (i = 0; size; i++) { 1188 for (i = 0; size; i++) {
1268 int nbytes = PAGE_CACHE_SIZE - offset; 1189 int nbytes = PAGE_CACHE_SIZE - offset;
1269 struct page *page = pb->pb_pages[i]; 1190 struct page *page = bp->b_pages[i];
1270 1191
1271 if (nbytes > size) 1192 if (nbytes > size)
1272 nbytes = size; 1193 nbytes = size;
@@ -1276,30 +1197,30 @@ _pagebuf_ioapply(
1276 size -= nbytes; 1197 size -= nbytes;
1277 offset = 0; 1198 offset = 0;
1278 } 1199 }
1279 offset = pb->pb_offset; 1200 offset = bp->b_offset;
1280 size = pb->pb_count_desired; 1201 size = bp->b_count_desired;
1281 } 1202 }
1282 1203
1283next_chunk: 1204next_chunk:
1284 atomic_inc(&pb->pb_io_remaining); 1205 atomic_inc(&bp->b_io_remaining);
1285 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); 1206 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
1286 if (nr_pages > total_nr_pages) 1207 if (nr_pages > total_nr_pages)
1287 nr_pages = total_nr_pages; 1208 nr_pages = total_nr_pages;
1288 1209
1289 bio = bio_alloc(GFP_NOIO, nr_pages); 1210 bio = bio_alloc(GFP_NOIO, nr_pages);
1290 bio->bi_bdev = pb->pb_target->pbr_bdev; 1211 bio->bi_bdev = bp->b_target->bt_bdev;
1291 bio->bi_sector = sector; 1212 bio->bi_sector = sector;
1292 bio->bi_end_io = bio_end_io_pagebuf; 1213 bio->bi_end_io = xfs_buf_bio_end_io;
1293 bio->bi_private = pb; 1214 bio->bi_private = bp;
1294 1215
1295 for (; size && nr_pages; nr_pages--, map_i++) { 1216 for (; size && nr_pages; nr_pages--, map_i++) {
1296 int nbytes = PAGE_CACHE_SIZE - offset; 1217 int rbytes, nbytes = PAGE_CACHE_SIZE - offset;
1297 1218
1298 if (nbytes > size) 1219 if (nbytes > size)
1299 nbytes = size; 1220 nbytes = size;
1300 1221
1301 if (bio_add_page(bio, pb->pb_pages[map_i], 1222 rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
1302 nbytes, offset) < nbytes) 1223 if (rbytes < nbytes)
1303 break; 1224 break;
1304 1225
1305 offset = 0; 1226 offset = 0;
@@ -1315,107 +1236,102 @@ submit_io:
1315 goto next_chunk; 1236 goto next_chunk;
1316 } else { 1237 } else {
1317 bio_put(bio); 1238 bio_put(bio);
1318 pagebuf_ioerror(pb, EIO); 1239 xfs_buf_ioerror(bp, EIO);
1319 } 1240 }
1320} 1241}
1321 1242
1322/*
1323 * pagebuf_iorequest -- the core I/O request routine.
1324 */
1325int 1243int
1326pagebuf_iorequest( /* start real I/O */ 1244xfs_buf_iorequest(
1327 xfs_buf_t *pb) /* buffer to convey to device */ 1245 xfs_buf_t *bp)
1328{ 1246{
1329 PB_TRACE(pb, "iorequest", 0); 1247 XB_TRACE(bp, "iorequest", 0);
1330 1248
1331 if (pb->pb_flags & PBF_DELWRI) { 1249 if (bp->b_flags & XBF_DELWRI) {
1332 pagebuf_delwri_queue(pb, 1); 1250 xfs_buf_delwri_queue(bp, 1);
1333 return 0; 1251 return 0;
1334 } 1252 }
1335 1253
1336 if (pb->pb_flags & PBF_WRITE) { 1254 if (bp->b_flags & XBF_WRITE) {
1337 _pagebuf_wait_unpin(pb); 1255 xfs_buf_wait_unpin(bp);
1338 } 1256 }
1339 1257
1340 pagebuf_hold(pb); 1258 xfs_buf_hold(bp);
1341 1259
1342 /* Set the count to 1 initially, this will stop an I/O 1260 /* Set the count to 1 initially, this will stop an I/O
1343 * completion callout which happens before we have started 1261 * completion callout which happens before we have started
1344 * all the I/O from calling pagebuf_iodone too early. 1262 * all the I/O from calling xfs_buf_ioend too early.
1345 */ 1263 */
1346 atomic_set(&pb->pb_io_remaining, 1); 1264 atomic_set(&bp->b_io_remaining, 1);
1347 _pagebuf_ioapply(pb); 1265 _xfs_buf_ioapply(bp);
1348 _pagebuf_iodone(pb, 0); 1266 _xfs_buf_ioend(bp, 0);
1349 1267
1350 pagebuf_rele(pb); 1268 xfs_buf_rele(bp);
1351 return 0; 1269 return 0;
1352} 1270}
1353 1271
1354/* 1272/*
1355 * pagebuf_iowait 1273 * Waits for I/O to complete on the buffer supplied.
1356 * 1274 * It returns immediately if no I/O is pending.
1357 * pagebuf_iowait waits for I/O to complete on the buffer supplied. 1275 * It returns the I/O error code, if any, or 0 if there was no error.
1358 * It returns immediately if no I/O is pending. In any case, it returns
1359 * the error code, if any, or 0 if there is no error.
1360 */ 1276 */
1361int 1277int
1362pagebuf_iowait( 1278xfs_buf_iowait(
1363 xfs_buf_t *pb) 1279 xfs_buf_t *bp)
1364{ 1280{
1365 PB_TRACE(pb, "iowait", 0); 1281 XB_TRACE(bp, "iowait", 0);
1366 if (atomic_read(&pb->pb_io_remaining)) 1282 if (atomic_read(&bp->b_io_remaining))
1367 blk_run_address_space(pb->pb_target->pbr_mapping); 1283 blk_run_address_space(bp->b_target->bt_mapping);
1368 down(&pb->pb_iodonesema); 1284 down(&bp->b_iodonesema);
1369 PB_TRACE(pb, "iowaited", (long)pb->pb_error); 1285 XB_TRACE(bp, "iowaited", (long)bp->b_error);
1370 return pb->pb_error; 1286 return bp->b_error;
1371} 1287}
1372 1288
1373caddr_t 1289xfs_caddr_t
1374pagebuf_offset( 1290xfs_buf_offset(
1375 xfs_buf_t *pb, 1291 xfs_buf_t *bp,
1376 size_t offset) 1292 size_t offset)
1377{ 1293{
1378 struct page *page; 1294 struct page *page;
1379 1295
1380 offset += pb->pb_offset; 1296 if (bp->b_flags & XBF_MAPPED)
1297 return XFS_BUF_PTR(bp) + offset;
1381 1298
1382 page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT]; 1299 offset += bp->b_offset;
1383 return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1)); 1300 page = bp->b_pages[offset >> PAGE_CACHE_SHIFT];
1301 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));
1384} 1302}
1385 1303
1386/* 1304/*
1387 * pagebuf_iomove
1388 *
1389 * Move data into or out of a buffer. 1305 * Move data into or out of a buffer.
1390 */ 1306 */
1391void 1307void
1392pagebuf_iomove( 1308xfs_buf_iomove(
1393 xfs_buf_t *pb, /* buffer to process */ 1309 xfs_buf_t *bp, /* buffer to process */
1394 size_t boff, /* starting buffer offset */ 1310 size_t boff, /* starting buffer offset */
1395 size_t bsize, /* length to copy */ 1311 size_t bsize, /* length to copy */
1396 caddr_t data, /* data address */ 1312 caddr_t data, /* data address */
1397 page_buf_rw_t mode) /* read/write flag */ 1313 xfs_buf_rw_t mode) /* read/write/zero flag */
1398{ 1314{
1399 size_t bend, cpoff, csize; 1315 size_t bend, cpoff, csize;
1400 struct page *page; 1316 struct page *page;
1401 1317
1402 bend = boff + bsize; 1318 bend = boff + bsize;
1403 while (boff < bend) { 1319 while (boff < bend) {
1404 page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)]; 1320 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
1405 cpoff = page_buf_poff(boff + pb->pb_offset); 1321 cpoff = xfs_buf_poff(boff + bp->b_offset);
1406 csize = min_t(size_t, 1322 csize = min_t(size_t,
1407 PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff); 1323 PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff);
1408 1324
1409 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); 1325 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
1410 1326
1411 switch (mode) { 1327 switch (mode) {
1412 case PBRW_ZERO: 1328 case XBRW_ZERO:
1413 memset(page_address(page) + cpoff, 0, csize); 1329 memset(page_address(page) + cpoff, 0, csize);
1414 break; 1330 break;
1415 case PBRW_READ: 1331 case XBRW_READ:
1416 memcpy(data, page_address(page) + cpoff, csize); 1332 memcpy(data, page_address(page) + cpoff, csize);
1417 break; 1333 break;
1418 case PBRW_WRITE: 1334 case XBRW_WRITE:
1419 memcpy(page_address(page) + cpoff, data, csize); 1335 memcpy(page_address(page) + cpoff, data, csize);
1420 } 1336 }
1421 1337
@@ -1425,12 +1341,12 @@ pagebuf_iomove(
1425} 1341}
1426 1342
1427/* 1343/*
1428 * Handling of buftargs. 1344 * Handling of buffer targets (buftargs).
1429 */ 1345 */
1430 1346
1431/* 1347/*
1432 * Wait for any bufs with callbacks that have been submitted but 1348 * Wait for any bufs with callbacks that have been submitted but
1433 * have not yet returned... walk the hash list for the target. 1349 * have not yet returned... walk the hash list for the target.
1434 */ 1350 */
1435void 1351void
1436xfs_wait_buftarg( 1352xfs_wait_buftarg(
@@ -1444,15 +1360,15 @@ xfs_wait_buftarg(
1444 hash = &btp->bt_hash[i]; 1360 hash = &btp->bt_hash[i];
1445again: 1361again:
1446 spin_lock(&hash->bh_lock); 1362 spin_lock(&hash->bh_lock);
1447 list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) { 1363 list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
1448 ASSERT(btp == bp->pb_target); 1364 ASSERT(btp == bp->b_target);
1449 if (!(bp->pb_flags & PBF_FS_MANAGED)) { 1365 if (!(bp->b_flags & XBF_FS_MANAGED)) {
1450 spin_unlock(&hash->bh_lock); 1366 spin_unlock(&hash->bh_lock);
1451 /* 1367 /*
1452 * Catch superblock reference count leaks 1368 * Catch superblock reference count leaks
1453 * immediately 1369 * immediately
1454 */ 1370 */
1455 BUG_ON(bp->pb_bn == 0); 1371 BUG_ON(bp->b_bn == 0);
1456 delay(100); 1372 delay(100);
1457 goto again; 1373 goto again;
1458 } 1374 }
@@ -1462,9 +1378,9 @@ again:
1462} 1378}
1463 1379
1464/* 1380/*
1465 * Allocate buffer hash table for a given target. 1381 * Allocate buffer hash table for a given target.
1466 * For devices containing metadata (i.e. not the log/realtime devices) 1382 * For devices containing metadata (i.e. not the log/realtime devices)
1467 * we need to allocate a much larger hash table. 1383 * we need to allocate a much larger hash table.
1468 */ 1384 */
1469STATIC void 1385STATIC void
1470xfs_alloc_bufhash( 1386xfs_alloc_bufhash(
@@ -1487,11 +1403,34 @@ STATIC void
1487xfs_free_bufhash( 1403xfs_free_bufhash(
1488 xfs_buftarg_t *btp) 1404 xfs_buftarg_t *btp)
1489{ 1405{
1490 kmem_free(btp->bt_hash, 1406 kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
1491 (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
1492 btp->bt_hash = NULL; 1407 btp->bt_hash = NULL;
1493} 1408}
1494 1409
1410/*
1411 * buftarg list for delwrite queue processing
1412 */
1413STATIC LIST_HEAD(xfs_buftarg_list);
1414STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
1415
1416STATIC void
1417xfs_register_buftarg(
1418 xfs_buftarg_t *btp)
1419{
1420 spin_lock(&xfs_buftarg_lock);
1421 list_add(&btp->bt_list, &xfs_buftarg_list);
1422 spin_unlock(&xfs_buftarg_lock);
1423}
1424
1425STATIC void
1426xfs_unregister_buftarg(
1427 xfs_buftarg_t *btp)
1428{
1429 spin_lock(&xfs_buftarg_lock);
1430 list_del(&btp->bt_list);
1431 spin_unlock(&xfs_buftarg_lock);
1432}
1433
1495void 1434void
1496xfs_free_buftarg( 1435xfs_free_buftarg(
1497 xfs_buftarg_t *btp, 1436 xfs_buftarg_t *btp,
@@ -1499,9 +1438,16 @@ xfs_free_buftarg(
1499{ 1438{
1500 xfs_flush_buftarg(btp, 1); 1439 xfs_flush_buftarg(btp, 1);
1501 if (external) 1440 if (external)
1502 xfs_blkdev_put(btp->pbr_bdev); 1441 xfs_blkdev_put(btp->bt_bdev);
1503 xfs_free_bufhash(btp); 1442 xfs_free_bufhash(btp);
1504 iput(btp->pbr_mapping->host); 1443 iput(btp->bt_mapping->host);
1444
1445 /* Unregister the buftarg first so that we don't get a
1446 * wakeup finding a non-existent task
1447 */
1448 xfs_unregister_buftarg(btp);
1449 kthread_stop(btp->bt_task);
1450
1505 kmem_free(btp, sizeof(*btp)); 1451 kmem_free(btp, sizeof(*btp));
1506} 1452}
1507 1453
@@ -1512,11 +1458,11 @@ xfs_setsize_buftarg_flags(
1512 unsigned int sectorsize, 1458 unsigned int sectorsize,
1513 int verbose) 1459 int verbose)
1514{ 1460{
1515 btp->pbr_bsize = blocksize; 1461 btp->bt_bsize = blocksize;
1516 btp->pbr_sshift = ffs(sectorsize) - 1; 1462 btp->bt_sshift = ffs(sectorsize) - 1;
1517 btp->pbr_smask = sectorsize - 1; 1463 btp->bt_smask = sectorsize - 1;
1518 1464
1519 if (set_blocksize(btp->pbr_bdev, sectorsize)) { 1465 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1520 printk(KERN_WARNING 1466 printk(KERN_WARNING
1521 "XFS: Cannot set_blocksize to %u on device %s\n", 1467 "XFS: Cannot set_blocksize to %u on device %s\n",
1522 sectorsize, XFS_BUFTARG_NAME(btp)); 1468 sectorsize, XFS_BUFTARG_NAME(btp));
@@ -1536,10 +1482,10 @@ xfs_setsize_buftarg_flags(
1536} 1482}
1537 1483
1538/* 1484/*
1539* When allocating the initial buffer target we have not yet 1485 * When allocating the initial buffer target we have not yet
1540* read in the superblock, so don't know what sized sectors 1486 * read in the superblock, so don't know what sized sectors
1541* are being used is at this early stage. Play safe. 1487 * are being used is at this early stage. Play safe.
1542*/ 1488 */
1543STATIC int 1489STATIC int
1544xfs_setsize_buftarg_early( 1490xfs_setsize_buftarg_early(
1545 xfs_buftarg_t *btp, 1491 xfs_buftarg_t *btp,
@@ -1587,10 +1533,30 @@ xfs_mapping_buftarg(
1587 mapping->a_ops = &mapping_aops; 1533 mapping->a_ops = &mapping_aops;
1588 mapping->backing_dev_info = bdi; 1534 mapping->backing_dev_info = bdi;
1589 mapping_set_gfp_mask(mapping, GFP_NOFS); 1535 mapping_set_gfp_mask(mapping, GFP_NOFS);
1590 btp->pbr_mapping = mapping; 1536 btp->bt_mapping = mapping;
1591 return 0; 1537 return 0;
1592} 1538}
1593 1539
1540STATIC int
1541xfs_alloc_delwrite_queue(
1542 xfs_buftarg_t *btp)
1543{
1544 int error = 0;
1545
1546 INIT_LIST_HEAD(&btp->bt_list);
1547 INIT_LIST_HEAD(&btp->bt_delwrite_queue);
1548 spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
1549 btp->bt_flags = 0;
1550 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
1551 if (IS_ERR(btp->bt_task)) {
1552 error = PTR_ERR(btp->bt_task);
1553 goto out_error;
1554 }
1555 xfs_register_buftarg(btp);
1556out_error:
1557 return error;
1558}
1559
1594xfs_buftarg_t * 1560xfs_buftarg_t *
1595xfs_alloc_buftarg( 1561xfs_alloc_buftarg(
1596 struct block_device *bdev, 1562 struct block_device *bdev,
@@ -1600,12 +1566,14 @@ xfs_alloc_buftarg(
1600 1566
1601 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); 1567 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
1602 1568
1603 btp->pbr_dev = bdev->bd_dev; 1569 btp->bt_dev = bdev->bd_dev;
1604 btp->pbr_bdev = bdev; 1570 btp->bt_bdev = bdev;
1605 if (xfs_setsize_buftarg_early(btp, bdev)) 1571 if (xfs_setsize_buftarg_early(btp, bdev))
1606 goto error; 1572 goto error;
1607 if (xfs_mapping_buftarg(btp, bdev)) 1573 if (xfs_mapping_buftarg(btp, bdev))
1608 goto error; 1574 goto error;
1575 if (xfs_alloc_delwrite_queue(btp))
1576 goto error;
1609 xfs_alloc_bufhash(btp, external); 1577 xfs_alloc_bufhash(btp, external);
1610 return btp; 1578 return btp;
1611 1579
@@ -1616,83 +1584,81 @@ error:
1616 1584
1617 1585
1618/* 1586/*
1619 * Pagebuf delayed write buffer handling 1587 * Delayed write buffer handling
1620 */ 1588 */
1621
1622STATIC LIST_HEAD(pbd_delwrite_queue);
1623STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
1624
1625STATIC void 1589STATIC void
1626pagebuf_delwri_queue( 1590xfs_buf_delwri_queue(
1627 xfs_buf_t *pb, 1591 xfs_buf_t *bp,
1628 int unlock) 1592 int unlock)
1629{ 1593{
1630 PB_TRACE(pb, "delwri_q", (long)unlock); 1594 struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
1631 ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == 1595 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1632 (PBF_DELWRI|PBF_ASYNC)); 1596
1597 XB_TRACE(bp, "delwri_q", (long)unlock);
1598 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
1633 1599
1634 spin_lock(&pbd_delwrite_lock); 1600 spin_lock(dwlk);
1635 /* If already in the queue, dequeue and place at tail */ 1601 /* If already in the queue, dequeue and place at tail */
1636 if (!list_empty(&pb->pb_list)) { 1602 if (!list_empty(&bp->b_list)) {
1637 ASSERT(pb->pb_flags & _PBF_DELWRI_Q); 1603 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1638 if (unlock) { 1604 if (unlock)
1639 atomic_dec(&pb->pb_hold); 1605 atomic_dec(&bp->b_hold);
1640 } 1606 list_del(&bp->b_list);
1641 list_del(&pb->pb_list);
1642 } 1607 }
1643 1608
1644 pb->pb_flags |= _PBF_DELWRI_Q; 1609 bp->b_flags |= _XBF_DELWRI_Q;
1645 list_add_tail(&pb->pb_list, &pbd_delwrite_queue); 1610 list_add_tail(&bp->b_list, dwq);
1646 pb->pb_queuetime = jiffies; 1611 bp->b_queuetime = jiffies;
1647 spin_unlock(&pbd_delwrite_lock); 1612 spin_unlock(dwlk);
1648 1613
1649 if (unlock) 1614 if (unlock)
1650 pagebuf_unlock(pb); 1615 xfs_buf_unlock(bp);
1651} 1616}
1652 1617
1653void 1618void
1654pagebuf_delwri_dequeue( 1619xfs_buf_delwri_dequeue(
1655 xfs_buf_t *pb) 1620 xfs_buf_t *bp)
1656{ 1621{
1622 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1657 int dequeued = 0; 1623 int dequeued = 0;
1658 1624
1659 spin_lock(&pbd_delwrite_lock); 1625 spin_lock(dwlk);
1660 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { 1626 if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
1661 ASSERT(pb->pb_flags & _PBF_DELWRI_Q); 1627 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1662 list_del_init(&pb->pb_list); 1628 list_del_init(&bp->b_list);
1663 dequeued = 1; 1629 dequeued = 1;
1664 } 1630 }
1665 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1631 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
1666 spin_unlock(&pbd_delwrite_lock); 1632 spin_unlock(dwlk);
1667 1633
1668 if (dequeued) 1634 if (dequeued)
1669 pagebuf_rele(pb); 1635 xfs_buf_rele(bp);
1670 1636
1671 PB_TRACE(pb, "delwri_dq", (long)dequeued); 1637 XB_TRACE(bp, "delwri_dq", (long)dequeued);
1672} 1638}
1673 1639
1674STATIC void 1640STATIC void
1675pagebuf_runall_queues( 1641xfs_buf_runall_queues(
1676 struct workqueue_struct *queue) 1642 struct workqueue_struct *queue)
1677{ 1643{
1678 flush_workqueue(queue); 1644 flush_workqueue(queue);
1679} 1645}
1680 1646
1681/* Defines for pagebuf daemon */
1682STATIC struct task_struct *xfsbufd_task;
1683STATIC int xfsbufd_force_flush;
1684STATIC int xfsbufd_force_sleep;
1685
1686STATIC int 1647STATIC int
1687xfsbufd_wakeup( 1648xfsbufd_wakeup(
1688 int priority, 1649 int priority,
1689 gfp_t mask) 1650 gfp_t mask)
1690{ 1651{
1691 if (xfsbufd_force_sleep) 1652 xfs_buftarg_t *btp;
1692 return 0; 1653
1693 xfsbufd_force_flush = 1; 1654 spin_lock(&xfs_buftarg_lock);
1694 barrier(); 1655 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1695 wake_up_process(xfsbufd_task); 1656 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1657 continue;
1658 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1659 wake_up_process(btp->bt_task);
1660 }
1661 spin_unlock(&xfs_buftarg_lock);
1696 return 0; 1662 return 0;
1697} 1663}
1698 1664
@@ -1702,67 +1668,70 @@ xfsbufd(
1702{ 1668{
1703 struct list_head tmp; 1669 struct list_head tmp;
1704 unsigned long age; 1670 unsigned long age;
1705 xfs_buftarg_t *target; 1671 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1706 xfs_buf_t *pb, *n; 1672 xfs_buf_t *bp, *n;
1673 struct list_head *dwq = &target->bt_delwrite_queue;
1674 spinlock_t *dwlk = &target->bt_delwrite_lock;
1707 1675
1708 current->flags |= PF_MEMALLOC; 1676 current->flags |= PF_MEMALLOC;
1709 1677
1710 INIT_LIST_HEAD(&tmp); 1678 INIT_LIST_HEAD(&tmp);
1711 do { 1679 do {
1712 if (unlikely(freezing(current))) { 1680 if (unlikely(freezing(current))) {
1713 xfsbufd_force_sleep = 1; 1681 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1714 refrigerator(); 1682 refrigerator();
1715 } else { 1683 } else {
1716 xfsbufd_force_sleep = 0; 1684 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1717 } 1685 }
1718 1686
1719 schedule_timeout_interruptible( 1687 schedule_timeout_interruptible(
1720 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1688 xfs_buf_timer_centisecs * msecs_to_jiffies(10));
1721 1689
1722 age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1690 age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1723 spin_lock(&pbd_delwrite_lock); 1691 spin_lock(dwlk);
1724 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1692 list_for_each_entry_safe(bp, n, dwq, b_list) {
1725 PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); 1693 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
1726 ASSERT(pb->pb_flags & PBF_DELWRI); 1694 ASSERT(bp->b_flags & XBF_DELWRI);
1727 1695
1728 if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { 1696 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
1729 if (!xfsbufd_force_flush && 1697 if (!test_bit(XBT_FORCE_FLUSH,
1698 &target->bt_flags) &&
1730 time_before(jiffies, 1699 time_before(jiffies,
1731 pb->pb_queuetime + age)) { 1700 bp->b_queuetime + age)) {
1732 pagebuf_unlock(pb); 1701 xfs_buf_unlock(bp);
1733 break; 1702 break;
1734 } 1703 }
1735 1704
1736 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1705 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
1737 pb->pb_flags |= PBF_WRITE; 1706 bp->b_flags |= XBF_WRITE;
1738 list_move(&pb->pb_list, &tmp); 1707 list_move(&bp->b_list, &tmp);
1739 } 1708 }
1740 } 1709 }
1741 spin_unlock(&pbd_delwrite_lock); 1710 spin_unlock(dwlk);
1742 1711
1743 while (!list_empty(&tmp)) { 1712 while (!list_empty(&tmp)) {
1744 pb = list_entry(tmp.next, xfs_buf_t, pb_list); 1713 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1745 target = pb->pb_target; 1714 ASSERT(target == bp->b_target);
1746 1715
1747 list_del_init(&pb->pb_list); 1716 list_del_init(&bp->b_list);
1748 pagebuf_iostrategy(pb); 1717 xfs_buf_iostrategy(bp);
1749 1718
1750 blk_run_address_space(target->pbr_mapping); 1719 blk_run_address_space(target->bt_mapping);
1751 } 1720 }
1752 1721
1753 if (as_list_len > 0) 1722 if (as_list_len > 0)
1754 purge_addresses(); 1723 purge_addresses();
1755 1724
1756 xfsbufd_force_flush = 0; 1725 clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1757 } while (!kthread_should_stop()); 1726 } while (!kthread_should_stop());
1758 1727
1759 return 0; 1728 return 0;
1760} 1729}
1761 1730
1762/* 1731/*
1763 * Go through all incore buffers, and release buffers if they belong to 1732 * Go through all incore buffers, and release buffers if they belong to
1764 * the given device. This is used in filesystem error handling to 1733 * the given device. This is used in filesystem error handling to
1765 * preserve the consistency of its metadata. 1734 * preserve the consistency of its metadata.
1766 */ 1735 */
1767int 1736int
1768xfs_flush_buftarg( 1737xfs_flush_buftarg(
@@ -1770,73 +1739,72 @@ xfs_flush_buftarg(
1770 int wait) 1739 int wait)
1771{ 1740{
1772 struct list_head tmp; 1741 struct list_head tmp;
1773 xfs_buf_t *pb, *n; 1742 xfs_buf_t *bp, *n;
1774 int pincount = 0; 1743 int pincount = 0;
1744 struct list_head *dwq = &target->bt_delwrite_queue;
1745 spinlock_t *dwlk = &target->bt_delwrite_lock;
1775 1746
1776 pagebuf_runall_queues(xfsdatad_workqueue); 1747 xfs_buf_runall_queues(xfsdatad_workqueue);
1777 pagebuf_runall_queues(xfslogd_workqueue); 1748 xfs_buf_runall_queues(xfslogd_workqueue);
1778 1749
1779 INIT_LIST_HEAD(&tmp); 1750 INIT_LIST_HEAD(&tmp);
1780 spin_lock(&pbd_delwrite_lock); 1751 spin_lock(dwlk);
1781 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1752 list_for_each_entry_safe(bp, n, dwq, b_list) {
1782 1753 ASSERT(bp->b_target == target);
1783 if (pb->pb_target != target) 1754 ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
1784 continue; 1755 XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
1785 1756 if (xfs_buf_ispin(bp)) {
1786 ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
1787 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
1788 if (pagebuf_ispin(pb)) {
1789 pincount++; 1757 pincount++;
1790 continue; 1758 continue;
1791 } 1759 }
1792 1760
1793 list_move(&pb->pb_list, &tmp); 1761 list_move(&bp->b_list, &tmp);
1794 } 1762 }
1795 spin_unlock(&pbd_delwrite_lock); 1763 spin_unlock(dwlk);
1796 1764
1797 /* 1765 /*
1798 * Dropped the delayed write list lock, now walk the temporary list 1766 * Dropped the delayed write list lock, now walk the temporary list
1799 */ 1767 */
1800 list_for_each_entry_safe(pb, n, &tmp, pb_list) { 1768 list_for_each_entry_safe(bp, n, &tmp, b_list) {
1801 pagebuf_lock(pb); 1769 xfs_buf_lock(bp);
1802 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1770 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
1803 pb->pb_flags |= PBF_WRITE; 1771 bp->b_flags |= XBF_WRITE;
1804 if (wait) 1772 if (wait)
1805 pb->pb_flags &= ~PBF_ASYNC; 1773 bp->b_flags &= ~XBF_ASYNC;
1806 else 1774 else
1807 list_del_init(&pb->pb_list); 1775 list_del_init(&bp->b_list);
1808 1776
1809 pagebuf_iostrategy(pb); 1777 xfs_buf_iostrategy(bp);
1810 } 1778 }
1811 1779
1812 /* 1780 /*
1813 * Remaining list items must be flushed before returning 1781 * Remaining list items must be flushed before returning
1814 */ 1782 */
1815 while (!list_empty(&tmp)) { 1783 while (!list_empty(&tmp)) {
1816 pb = list_entry(tmp.next, xfs_buf_t, pb_list); 1784 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1817 1785
1818 list_del_init(&pb->pb_list); 1786 list_del_init(&bp->b_list);
1819 xfs_iowait(pb); 1787 xfs_iowait(bp);
1820 xfs_buf_relse(pb); 1788 xfs_buf_relse(bp);
1821 } 1789 }
1822 1790
1823 if (wait) 1791 if (wait)
1824 blk_run_address_space(target->pbr_mapping); 1792 blk_run_address_space(target->bt_mapping);
1825 1793
1826 return pincount; 1794 return pincount;
1827} 1795}
1828 1796
1829int __init 1797int __init
1830pagebuf_init(void) 1798xfs_buf_init(void)
1831{ 1799{
1832 int error = -ENOMEM; 1800 int error = -ENOMEM;
1833 1801
1834#ifdef PAGEBUF_TRACE 1802#ifdef XFS_BUF_TRACE
1835 pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); 1803 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
1836#endif 1804#endif
1837 1805
1838 pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); 1806 xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
1839 if (!pagebuf_zone) 1807 if (!xfs_buf_zone)
1840 goto out_free_trace_buf; 1808 goto out_free_trace_buf;
1841 1809
1842 xfslogd_workqueue = create_workqueue("xfslogd"); 1810 xfslogd_workqueue = create_workqueue("xfslogd");
@@ -1847,42 +1815,33 @@ pagebuf_init(void)
1847 if (!xfsdatad_workqueue) 1815 if (!xfsdatad_workqueue)
1848 goto out_destroy_xfslogd_workqueue; 1816 goto out_destroy_xfslogd_workqueue;
1849 1817
1850 xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); 1818 xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
1851 if (IS_ERR(xfsbufd_task)) { 1819 if (!xfs_buf_shake)
1852 error = PTR_ERR(xfsbufd_task);
1853 goto out_destroy_xfsdatad_workqueue; 1820 goto out_destroy_xfsdatad_workqueue;
1854 }
1855
1856 pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
1857 if (!pagebuf_shake)
1858 goto out_stop_xfsbufd;
1859 1821
1860 return 0; 1822 return 0;
1861 1823
1862 out_stop_xfsbufd:
1863 kthread_stop(xfsbufd_task);
1864 out_destroy_xfsdatad_workqueue: 1824 out_destroy_xfsdatad_workqueue:
1865 destroy_workqueue(xfsdatad_workqueue); 1825 destroy_workqueue(xfsdatad_workqueue);
1866 out_destroy_xfslogd_workqueue: 1826 out_destroy_xfslogd_workqueue:
1867 destroy_workqueue(xfslogd_workqueue); 1827 destroy_workqueue(xfslogd_workqueue);
1868 out_free_buf_zone: 1828 out_free_buf_zone:
1869 kmem_zone_destroy(pagebuf_zone); 1829 kmem_zone_destroy(xfs_buf_zone);
1870 out_free_trace_buf: 1830 out_free_trace_buf:
1871#ifdef PAGEBUF_TRACE 1831#ifdef XFS_BUF_TRACE
1872 ktrace_free(pagebuf_trace_buf); 1832 ktrace_free(xfs_buf_trace_buf);
1873#endif 1833#endif
1874 return error; 1834 return error;
1875} 1835}
1876 1836
1877void 1837void
1878pagebuf_terminate(void) 1838xfs_buf_terminate(void)
1879{ 1839{
1880 kmem_shake_deregister(pagebuf_shake); 1840 kmem_shake_deregister(xfs_buf_shake);
1881 kthread_stop(xfsbufd_task);
1882 destroy_workqueue(xfsdatad_workqueue); 1841 destroy_workqueue(xfsdatad_workqueue);
1883 destroy_workqueue(xfslogd_workqueue); 1842 destroy_workqueue(xfslogd_workqueue);
1884 kmem_zone_destroy(pagebuf_zone); 1843 kmem_zone_destroy(xfs_buf_zone);
1885#ifdef PAGEBUF_TRACE 1844#ifdef XFS_BUF_TRACE
1886 ktrace_free(pagebuf_trace_buf); 1845 ktrace_free(xfs_buf_trace_buf);
1887#endif 1846#endif
1888} 1847}