aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c392
1 files changed, 105 insertions, 287 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index f83a4c830a65..9ef9ed2cfe2e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -94,75 +94,6 @@ xfs_buf_vmap_len(
94} 94}
95 95
96/* 96/*
97 * Page Region interfaces.
98 *
99 * For pages in filesystems where the blocksize is smaller than the
100 * pagesize, we use the page->private field (long) to hold a bitmap
101 * of uptodate regions within the page.
102 *
103 * Each such region is "bytes per page / bits per long" bytes long.
104 *
105 * NBPPR == number-of-bytes-per-page-region
106 * BTOPR == bytes-to-page-region (rounded up)
107 * BTOPRT == bytes-to-page-region-truncated (rounded down)
108 */
109#if (BITS_PER_LONG == 32)
110#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
111#elif (BITS_PER_LONG == 64)
112#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */
113#else
114#error BITS_PER_LONG must be 32 or 64
115#endif
116#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG)
117#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
118#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT))
119
120STATIC unsigned long
121page_region_mask(
122 size_t offset,
123 size_t length)
124{
125 unsigned long mask;
126 int first, final;
127
128 first = BTOPR(offset);
129 final = BTOPRT(offset + length - 1);
130 first = min(first, final);
131
132 mask = ~0UL;
133 mask <<= BITS_PER_LONG - (final - first);
134 mask >>= BITS_PER_LONG - (final);
135
136 ASSERT(offset + length <= PAGE_CACHE_SIZE);
137 ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
138
139 return mask;
140}
141
142STATIC void
143set_page_region(
144 struct page *page,
145 size_t offset,
146 size_t length)
147{
148 set_page_private(page,
149 page_private(page) | page_region_mask(offset, length));
150 if (page_private(page) == ~0UL)
151 SetPageUptodate(page);
152}
153
154STATIC int
155test_page_region(
156 struct page *page,
157 size_t offset,
158 size_t length)
159{
160 unsigned long mask = page_region_mask(offset, length);
161
162 return (mask && (page_private(page) & mask) == mask);
163}
164
165/*
166 * xfs_buf_lru_add - add a buffer to the LRU. 97 * xfs_buf_lru_add - add a buffer to the LRU.
167 * 98 *
168 * The LRU takes a new reference to the buffer so that it will only be freed 99 * The LRU takes a new reference to the buffer so that it will only be freed
@@ -189,7 +120,7 @@ xfs_buf_lru_add(
189 * The unlocked check is safe here because it only occurs when there are not 120 * The unlocked check is safe here because it only occurs when there are not
190 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there 121 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
191 * to optimise the shrinker removing the buffer from the LRU and calling 122 * to optimise the shrinker removing the buffer from the LRU and calling
192 * xfs_buf_free(). i.e. it removes an unneccessary round trip on the 123 * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
193 * bt_lru_lock. 124 * bt_lru_lock.
194 */ 125 */
195STATIC void 126STATIC void
@@ -332,7 +263,7 @@ xfs_buf_free(
332 263
333 ASSERT(list_empty(&bp->b_lru)); 264 ASSERT(list_empty(&bp->b_lru));
334 265
335 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 266 if (bp->b_flags & _XBF_PAGES) {
336 uint i; 267 uint i;
337 268
338 if (xfs_buf_is_vmapped(bp)) 269 if (xfs_buf_is_vmapped(bp))
@@ -342,56 +273,77 @@ xfs_buf_free(
342 for (i = 0; i < bp->b_page_count; i++) { 273 for (i = 0; i < bp->b_page_count; i++) {
343 struct page *page = bp->b_pages[i]; 274 struct page *page = bp->b_pages[i];
344 275
345 if (bp->b_flags & _XBF_PAGE_CACHE) 276 __free_page(page);
346 ASSERT(!PagePrivate(page));
347 page_cache_release(page);
348 } 277 }
349 } 278 } else if (bp->b_flags & _XBF_KMEM)
279 kmem_free(bp->b_addr);
350 _xfs_buf_free_pages(bp); 280 _xfs_buf_free_pages(bp);
351 xfs_buf_deallocate(bp); 281 xfs_buf_deallocate(bp);
352} 282}
353 283
354/* 284/*
355 * Finds all pages for buffer in question and builds it's page list. 285 * Allocates all the pages for buffer in question and builds it's page list.
356 */ 286 */
357STATIC int 287STATIC int
358_xfs_buf_lookup_pages( 288xfs_buf_allocate_memory(
359 xfs_buf_t *bp, 289 xfs_buf_t *bp,
360 uint flags) 290 uint flags)
361{ 291{
362 struct address_space *mapping = bp->b_target->bt_mapping;
363 size_t blocksize = bp->b_target->bt_bsize;
364 size_t size = bp->b_count_desired; 292 size_t size = bp->b_count_desired;
365 size_t nbytes, offset; 293 size_t nbytes, offset;
366 gfp_t gfp_mask = xb_to_gfp(flags); 294 gfp_t gfp_mask = xb_to_gfp(flags);
367 unsigned short page_count, i; 295 unsigned short page_count, i;
368 pgoff_t first;
369 xfs_off_t end; 296 xfs_off_t end;
370 int error; 297 int error;
371 298
299 /*
300 * for buffers that are contained within a single page, just allocate
301 * the memory from the heap - there's no need for the complexity of
302 * page arrays to keep allocation down to order 0.
303 */
304 if (bp->b_buffer_length < PAGE_SIZE) {
305 bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
306 if (!bp->b_addr) {
307 /* low memory - use alloc_page loop instead */
308 goto use_alloc_page;
309 }
310
311 if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
312 PAGE_MASK) !=
313 ((unsigned long)bp->b_addr & PAGE_MASK)) {
314 /* b_addr spans two pages - use alloc_page instead */
315 kmem_free(bp->b_addr);
316 bp->b_addr = NULL;
317 goto use_alloc_page;
318 }
319 bp->b_offset = offset_in_page(bp->b_addr);
320 bp->b_pages = bp->b_page_array;
321 bp->b_pages[0] = virt_to_page(bp->b_addr);
322 bp->b_page_count = 1;
323 bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
324 return 0;
325 }
326
327use_alloc_page:
372 end = bp->b_file_offset + bp->b_buffer_length; 328 end = bp->b_file_offset + bp->b_buffer_length;
373 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); 329 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
374
375 error = _xfs_buf_get_pages(bp, page_count, flags); 330 error = _xfs_buf_get_pages(bp, page_count, flags);
376 if (unlikely(error)) 331 if (unlikely(error))
377 return error; 332 return error;
378 bp->b_flags |= _XBF_PAGE_CACHE;
379 333
380 offset = bp->b_offset; 334 offset = bp->b_offset;
381 first = bp->b_file_offset >> PAGE_CACHE_SHIFT; 335 bp->b_flags |= _XBF_PAGES;
382 336
383 for (i = 0; i < bp->b_page_count; i++) { 337 for (i = 0; i < bp->b_page_count; i++) {
384 struct page *page; 338 struct page *page;
385 uint retries = 0; 339 uint retries = 0;
386 340retry:
387 retry: 341 page = alloc_page(gfp_mask);
388 page = find_or_create_page(mapping, first + i, gfp_mask);
389 if (unlikely(page == NULL)) { 342 if (unlikely(page == NULL)) {
390 if (flags & XBF_READ_AHEAD) { 343 if (flags & XBF_READ_AHEAD) {
391 bp->b_page_count = i; 344 bp->b_page_count = i;
392 for (i = 0; i < bp->b_page_count; i++) 345 error = ENOMEM;
393 unlock_page(bp->b_pages[i]); 346 goto out_free_pages;
394 return -ENOMEM;
395 } 347 }
396 348
397 /* 349 /*
@@ -401,9 +353,8 @@ _xfs_buf_lookup_pages(
401 * handle buffer allocation failures we can't do much. 353 * handle buffer allocation failures we can't do much.
402 */ 354 */
403 if (!(++retries % 100)) 355 if (!(++retries % 100))
404 printk(KERN_ERR 356 xfs_err(NULL,
405 "XFS: possible memory allocation " 357 "possible memory allocation deadlock in %s (mode:0x%x)",
406 "deadlock in %s (mode:0x%x)\n",
407 __func__, gfp_mask); 358 __func__, gfp_mask);
408 359
409 XFS_STATS_INC(xb_page_retries); 360 XFS_STATS_INC(xb_page_retries);
@@ -413,52 +364,44 @@ _xfs_buf_lookup_pages(
413 364
414 XFS_STATS_INC(xb_page_found); 365 XFS_STATS_INC(xb_page_found);
415 366
416 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 367 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
417 size -= nbytes; 368 size -= nbytes;
418
419 ASSERT(!PagePrivate(page));
420 if (!PageUptodate(page)) {
421 page_count--;
422 if (blocksize >= PAGE_CACHE_SIZE) {
423 if (flags & XBF_READ)
424 bp->b_flags |= _XBF_PAGE_LOCKED;
425 } else if (!PagePrivate(page)) {
426 if (test_page_region(page, offset, nbytes))
427 page_count++;
428 }
429 }
430
431 bp->b_pages[i] = page; 369 bp->b_pages[i] = page;
432 offset = 0; 370 offset = 0;
433 } 371 }
372 return 0;
434 373
435 if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { 374out_free_pages:
436 for (i = 0; i < bp->b_page_count; i++) 375 for (i = 0; i < bp->b_page_count; i++)
437 unlock_page(bp->b_pages[i]); 376 __free_page(bp->b_pages[i]);
438 }
439
440 if (page_count == bp->b_page_count)
441 bp->b_flags |= XBF_DONE;
442
443 return error; 377 return error;
444} 378}
445 379
446/* 380/*
447 * Map buffer into kernel address-space if nessecary. 381 * Map buffer into kernel address-space if necessary.
448 */ 382 */
449STATIC int 383STATIC int
450_xfs_buf_map_pages( 384_xfs_buf_map_pages(
451 xfs_buf_t *bp, 385 xfs_buf_t *bp,
452 uint flags) 386 uint flags)
453{ 387{
454 /* A single page buffer is always mappable */ 388 ASSERT(bp->b_flags & _XBF_PAGES);
455 if (bp->b_page_count == 1) { 389 if (bp->b_page_count == 1) {
390 /* A single page buffer is always mappable */
456 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 391 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
457 bp->b_flags |= XBF_MAPPED; 392 bp->b_flags |= XBF_MAPPED;
458 } else if (flags & XBF_MAPPED) { 393 } else if (flags & XBF_MAPPED) {
459 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, 394 int retried = 0;
460 -1, PAGE_KERNEL); 395
461 if (unlikely(bp->b_addr == NULL)) 396 do {
397 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
398 -1, PAGE_KERNEL);
399 if (bp->b_addr)
400 break;
401 vm_unmap_aliases();
402 } while (retried++ <= 1);
403
404 if (!bp->b_addr)
462 return -ENOMEM; 405 return -ENOMEM;
463 bp->b_addr += bp->b_offset; 406 bp->b_addr += bp->b_offset;
464 bp->b_flags |= XBF_MAPPED; 407 bp->b_flags |= XBF_MAPPED;
@@ -569,9 +512,14 @@ found:
569 } 512 }
570 } 513 }
571 514
515 /*
516 * if the buffer is stale, clear all the external state associated with
517 * it. We need to keep flags such as how we allocated the buffer memory
518 * intact here.
519 */
572 if (bp->b_flags & XBF_STALE) { 520 if (bp->b_flags & XBF_STALE) {
573 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 521 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
574 bp->b_flags &= XBF_MAPPED; 522 bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
575 } 523 }
576 524
577 trace_xfs_buf_find(bp, flags, _RET_IP_); 525 trace_xfs_buf_find(bp, flags, _RET_IP_);
@@ -592,7 +540,7 @@ xfs_buf_get(
592 xfs_buf_flags_t flags) 540 xfs_buf_flags_t flags)
593{ 541{
594 xfs_buf_t *bp, *new_bp; 542 xfs_buf_t *bp, *new_bp;
595 int error = 0, i; 543 int error = 0;
596 544
597 new_bp = xfs_buf_allocate(flags); 545 new_bp = xfs_buf_allocate(flags);
598 if (unlikely(!new_bp)) 546 if (unlikely(!new_bp))
@@ -600,7 +548,7 @@ xfs_buf_get(
600 548
601 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); 549 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
602 if (bp == new_bp) { 550 if (bp == new_bp) {
603 error = _xfs_buf_lookup_pages(bp, flags); 551 error = xfs_buf_allocate_memory(bp, flags);
604 if (error) 552 if (error)
605 goto no_buffer; 553 goto no_buffer;
606 } else { 554 } else {
@@ -609,14 +557,11 @@ xfs_buf_get(
609 return NULL; 557 return NULL;
610 } 558 }
611 559
612 for (i = 0; i < bp->b_page_count; i++)
613 mark_page_accessed(bp->b_pages[i]);
614
615 if (!(bp->b_flags & XBF_MAPPED)) { 560 if (!(bp->b_flags & XBF_MAPPED)) {
616 error = _xfs_buf_map_pages(bp, flags); 561 error = _xfs_buf_map_pages(bp, flags);
617 if (unlikely(error)) { 562 if (unlikely(error)) {
618 printk(KERN_WARNING "%s: failed to map pages\n", 563 xfs_warn(target->bt_mount,
619 __func__); 564 "%s: failed to map pages\n", __func__);
620 goto no_buffer; 565 goto no_buffer;
621 } 566 }
622 } 567 }
@@ -710,10 +655,7 @@ xfs_buf_readahead(
710 xfs_off_t ioff, 655 xfs_off_t ioff,
711 size_t isize) 656 size_t isize)
712{ 657{
713 struct backing_dev_info *bdi; 658 if (bdi_read_congested(target->bt_bdi))
714
715 bdi = target->bt_mapping->backing_dev_info;
716 if (bdi_read_congested(bdi))
717 return; 659 return;
718 660
719 xfs_buf_read(target, ioff, isize, 661 xfs_buf_read(target, ioff, isize,
@@ -791,10 +733,10 @@ xfs_buf_associate_memory(
791 size_t buflen; 733 size_t buflen;
792 int page_count; 734 int page_count;
793 735
794 pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; 736 pageaddr = (unsigned long)mem & PAGE_MASK;
795 offset = (unsigned long)mem - pageaddr; 737 offset = (unsigned long)mem - pageaddr;
796 buflen = PAGE_CACHE_ALIGN(len + offset); 738 buflen = PAGE_ALIGN(len + offset);
797 page_count = buflen >> PAGE_CACHE_SHIFT; 739 page_count = buflen >> PAGE_SHIFT;
798 740
799 /* Free any previous set of page pointers */ 741 /* Free any previous set of page pointers */
800 if (bp->b_pages) 742 if (bp->b_pages)
@@ -811,13 +753,12 @@ xfs_buf_associate_memory(
811 753
812 for (i = 0; i < bp->b_page_count; i++) { 754 for (i = 0; i < bp->b_page_count; i++) {
813 bp->b_pages[i] = mem_to_page((void *)pageaddr); 755 bp->b_pages[i] = mem_to_page((void *)pageaddr);
814 pageaddr += PAGE_CACHE_SIZE; 756 pageaddr += PAGE_SIZE;
815 } 757 }
816 758
817 bp->b_count_desired = len; 759 bp->b_count_desired = len;
818 bp->b_buffer_length = buflen; 760 bp->b_buffer_length = buflen;
819 bp->b_flags |= XBF_MAPPED; 761 bp->b_flags |= XBF_MAPPED;
820 bp->b_flags &= ~_XBF_PAGE_LOCKED;
821 762
822 return 0; 763 return 0;
823} 764}
@@ -850,8 +791,8 @@ xfs_buf_get_uncached(
850 791
851 error = _xfs_buf_map_pages(bp, XBF_MAPPED); 792 error = _xfs_buf_map_pages(bp, XBF_MAPPED);
852 if (unlikely(error)) { 793 if (unlikely(error)) {
853 printk(KERN_WARNING "%s: failed to map pages\n", 794 xfs_warn(target->bt_mount,
854 __func__); 795 "%s: failed to map pages\n", __func__);
855 goto fail_free_mem; 796 goto fail_free_mem;
856 } 797 }
857 798
@@ -924,20 +865,7 @@ xfs_buf_rele(
924 865
925 866
926/* 867/*
927 * Mutual exclusion on buffers. Locking model: 868 * Lock a buffer object, if it is not already locked.
928 *
929 * Buffers associated with inodes for which buffer locking
930 * is not enabled are not protected by semaphores, and are
931 * assumed to be exclusively owned by the caller. There is a
932 * spinlock in the buffer, used by the caller when concurrent
933 * access is possible.
934 */
935
936/*
937 * Locks a buffer object, if it is not already locked. Note that this in
938 * no way locks the underlying pages, so it is only useful for
939 * synchronizing concurrent use of buffer objects, not for synchronizing
940 * independent access to the underlying pages.
941 * 869 *
942 * If we come across a stale, pinned, locked buffer, we know that we are 870 * If we come across a stale, pinned, locked buffer, we know that we are
943 * being asked to lock a buffer that has been reallocated. Because it is 871 * being asked to lock a buffer that has been reallocated. Because it is
@@ -971,10 +899,7 @@ xfs_buf_lock_value(
971} 899}
972 900
973/* 901/*
974 * Locks a buffer object. 902 * Lock a buffer object.
975 * Note that this in no way locks the underlying pages, so it is only
976 * useful for synchronizing concurrent use of buffer objects, not for
977 * synchronizing independent access to the underlying pages.
978 * 903 *
979 * If we come across a stale, pinned, locked buffer, we know that we 904 * If we come across a stale, pinned, locked buffer, we know that we
980 * are being asked to lock a buffer that has been reallocated. Because 905 * are being asked to lock a buffer that has been reallocated. Because
@@ -990,8 +915,6 @@ xfs_buf_lock(
990 915
991 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 916 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
992 xfs_log_force(bp->b_target->bt_mount, 0); 917 xfs_log_force(bp->b_target->bt_mount, 0);
993 if (atomic_read(&bp->b_io_remaining))
994 blk_run_address_space(bp->b_target->bt_mapping);
995 down(&bp->b_sema); 918 down(&bp->b_sema);
996 XB_SET_OWNER(bp); 919 XB_SET_OWNER(bp);
997 920
@@ -1035,9 +958,7 @@ xfs_buf_wait_unpin(
1035 set_current_state(TASK_UNINTERRUPTIBLE); 958 set_current_state(TASK_UNINTERRUPTIBLE);
1036 if (atomic_read(&bp->b_pin_count) == 0) 959 if (atomic_read(&bp->b_pin_count) == 0)
1037 break; 960 break;
1038 if (atomic_read(&bp->b_io_remaining)) 961 io_schedule();
1039 blk_run_address_space(bp->b_target->bt_mapping);
1040 schedule();
1041 } 962 }
1042 remove_wait_queue(&bp->b_waiters, &wait); 963 remove_wait_queue(&bp->b_waiters, &wait);
1043 set_current_state(TASK_RUNNING); 964 set_current_state(TASK_RUNNING);
@@ -1249,10 +1170,8 @@ _xfs_buf_ioend(
1249 xfs_buf_t *bp, 1170 xfs_buf_t *bp,
1250 int schedule) 1171 int schedule)
1251{ 1172{
1252 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { 1173 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1253 bp->b_flags &= ~_XBF_PAGE_LOCKED;
1254 xfs_buf_ioend(bp, schedule); 1174 xfs_buf_ioend(bp, schedule);
1255 }
1256} 1175}
1257 1176
1258STATIC void 1177STATIC void
@@ -1261,35 +1180,12 @@ xfs_buf_bio_end_io(
1261 int error) 1180 int error)
1262{ 1181{
1263 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; 1182 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1264 unsigned int blocksize = bp->b_target->bt_bsize;
1265 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1266 1183
1267 xfs_buf_ioerror(bp, -error); 1184 xfs_buf_ioerror(bp, -error);
1268 1185
1269 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1186 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1270 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1187 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1271 1188
1272 do {
1273 struct page *page = bvec->bv_page;
1274
1275 ASSERT(!PagePrivate(page));
1276 if (unlikely(bp->b_error)) {
1277 if (bp->b_flags & XBF_READ)
1278 ClearPageUptodate(page);
1279 } else if (blocksize >= PAGE_CACHE_SIZE) {
1280 SetPageUptodate(page);
1281 } else if (!PagePrivate(page) &&
1282 (bp->b_flags & _XBF_PAGE_CACHE)) {
1283 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1284 }
1285
1286 if (--bvec >= bio->bi_io_vec)
1287 prefetchw(&bvec->bv_page->flags);
1288
1289 if (bp->b_flags & _XBF_PAGE_LOCKED)
1290 unlock_page(page);
1291 } while (bvec >= bio->bi_io_vec);
1292
1293 _xfs_buf_ioend(bp, 1); 1189 _xfs_buf_ioend(bp, 1);
1294 bio_put(bio); 1190 bio_put(bio);
1295} 1191}
@@ -1303,7 +1199,6 @@ _xfs_buf_ioapply(
1303 int offset = bp->b_offset; 1199 int offset = bp->b_offset;
1304 int size = bp->b_count_desired; 1200 int size = bp->b_count_desired;
1305 sector_t sector = bp->b_bn; 1201 sector_t sector = bp->b_bn;
1306 unsigned int blocksize = bp->b_target->bt_bsize;
1307 1202
1308 total_nr_pages = bp->b_page_count; 1203 total_nr_pages = bp->b_page_count;
1309 map_i = 0; 1204 map_i = 0;
@@ -1324,29 +1219,6 @@ _xfs_buf_ioapply(
1324 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1219 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
1325 } 1220 }
1326 1221
1327 /* Special code path for reading a sub page size buffer in --
1328 * we populate up the whole page, and hence the other metadata
1329 * in the same page. This optimization is only valid when the
1330 * filesystem block size is not smaller than the page size.
1331 */
1332 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
1333 ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
1334 (XBF_READ|_XBF_PAGE_LOCKED)) &&
1335 (blocksize >= PAGE_CACHE_SIZE)) {
1336 bio = bio_alloc(GFP_NOIO, 1);
1337
1338 bio->bi_bdev = bp->b_target->bt_bdev;
1339 bio->bi_sector = sector - (offset >> BBSHIFT);
1340 bio->bi_end_io = xfs_buf_bio_end_io;
1341 bio->bi_private = bp;
1342
1343 bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
1344 size = 0;
1345
1346 atomic_inc(&bp->b_io_remaining);
1347
1348 goto submit_io;
1349 }
1350 1222
1351next_chunk: 1223next_chunk:
1352 atomic_inc(&bp->b_io_remaining); 1224 atomic_inc(&bp->b_io_remaining);
@@ -1360,8 +1232,9 @@ next_chunk:
1360 bio->bi_end_io = xfs_buf_bio_end_io; 1232 bio->bi_end_io = xfs_buf_bio_end_io;
1361 bio->bi_private = bp; 1233 bio->bi_private = bp;
1362 1234
1235
1363 for (; size && nr_pages; nr_pages--, map_i++) { 1236 for (; size && nr_pages; nr_pages--, map_i++) {
1364 int rbytes, nbytes = PAGE_CACHE_SIZE - offset; 1237 int rbytes, nbytes = PAGE_SIZE - offset;
1365 1238
1366 if (nbytes > size) 1239 if (nbytes > size)
1367 nbytes = size; 1240 nbytes = size;
@@ -1376,7 +1249,6 @@ next_chunk:
1376 total_nr_pages--; 1249 total_nr_pages--;
1377 } 1250 }
1378 1251
1379submit_io:
1380 if (likely(bio->bi_size)) { 1252 if (likely(bio->bi_size)) {
1381 if (xfs_buf_is_vmapped(bp)) { 1253 if (xfs_buf_is_vmapped(bp)) {
1382 flush_kernel_vmap_range(bp->b_addr, 1254 flush_kernel_vmap_range(bp->b_addr,
@@ -1386,18 +1258,7 @@ submit_io:
1386 if (size) 1258 if (size)
1387 goto next_chunk; 1259 goto next_chunk;
1388 } else { 1260 } else {
1389 /*
1390 * if we get here, no pages were added to the bio. However,
1391 * we can't just error out here - if the pages are locked then
1392 * we have to unlock them otherwise we can hang on a later
1393 * access to the page.
1394 */
1395 xfs_buf_ioerror(bp, EIO); 1261 xfs_buf_ioerror(bp, EIO);
1396 if (bp->b_flags & _XBF_PAGE_LOCKED) {
1397 int i;
1398 for (i = 0; i < bp->b_page_count; i++)
1399 unlock_page(bp->b_pages[i]);
1400 }
1401 bio_put(bio); 1262 bio_put(bio);
1402 } 1263 }
1403} 1264}
@@ -1442,8 +1303,6 @@ xfs_buf_iowait(
1442{ 1303{
1443 trace_xfs_buf_iowait(bp, _RET_IP_); 1304 trace_xfs_buf_iowait(bp, _RET_IP_);
1444 1305
1445 if (atomic_read(&bp->b_io_remaining))
1446 blk_run_address_space(bp->b_target->bt_mapping);
1447 wait_for_completion(&bp->b_iowait); 1306 wait_for_completion(&bp->b_iowait);
1448 1307
1449 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1308 trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1461,8 +1320,8 @@ xfs_buf_offset(
1461 return XFS_BUF_PTR(bp) + offset; 1320 return XFS_BUF_PTR(bp) + offset;
1462 1321
1463 offset += bp->b_offset; 1322 offset += bp->b_offset;
1464 page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; 1323 page = bp->b_pages[offset >> PAGE_SHIFT];
1465 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); 1324 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
1466} 1325}
1467 1326
1468/* 1327/*
@@ -1484,9 +1343,9 @@ xfs_buf_iomove(
1484 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; 1343 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
1485 cpoff = xfs_buf_poff(boff + bp->b_offset); 1344 cpoff = xfs_buf_poff(boff + bp->b_offset);
1486 csize = min_t(size_t, 1345 csize = min_t(size_t,
1487 PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); 1346 PAGE_SIZE-cpoff, bp->b_count_desired-boff);
1488 1347
1489 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); 1348 ASSERT(((csize + cpoff) <= PAGE_SIZE));
1490 1349
1491 switch (mode) { 1350 switch (mode) {
1492 case XBRW_ZERO: 1351 case XBRW_ZERO:
@@ -1599,7 +1458,6 @@ xfs_free_buftarg(
1599 xfs_flush_buftarg(btp, 1); 1458 xfs_flush_buftarg(btp, 1);
1600 if (mp->m_flags & XFS_MOUNT_BARRIER) 1459 if (mp->m_flags & XFS_MOUNT_BARRIER)
1601 xfs_blkdev_issue_flush(btp); 1460 xfs_blkdev_issue_flush(btp);
1602 iput(btp->bt_mapping->host);
1603 1461
1604 kthread_stop(btp->bt_task); 1462 kthread_stop(btp->bt_task);
1605 kmem_free(btp); 1463 kmem_free(btp);
@@ -1617,21 +1475,12 @@ xfs_setsize_buftarg_flags(
1617 btp->bt_smask = sectorsize - 1; 1475 btp->bt_smask = sectorsize - 1;
1618 1476
1619 if (set_blocksize(btp->bt_bdev, sectorsize)) { 1477 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1620 printk(KERN_WARNING 1478 xfs_warn(btp->bt_mount,
1621 "XFS: Cannot set_blocksize to %u on device %s\n", 1479 "Cannot set_blocksize to %u on device %s\n",
1622 sectorsize, XFS_BUFTARG_NAME(btp)); 1480 sectorsize, XFS_BUFTARG_NAME(btp));
1623 return EINVAL; 1481 return EINVAL;
1624 } 1482 }
1625 1483
1626 if (verbose &&
1627 (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
1628 printk(KERN_WARNING
1629 "XFS: %u byte sectors in use on device %s. "
1630 "This is suboptimal; %u or greater is ideal.\n",
1631 sectorsize, XFS_BUFTARG_NAME(btp),
1632 (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
1633 }
1634
1635 return 0; 1484 return 0;
1636} 1485}
1637 1486
@@ -1646,7 +1495,7 @@ xfs_setsize_buftarg_early(
1646 struct block_device *bdev) 1495 struct block_device *bdev)
1647{ 1496{
1648 return xfs_setsize_buftarg_flags(btp, 1497 return xfs_setsize_buftarg_flags(btp,
1649 PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); 1498 PAGE_SIZE, bdev_logical_block_size(bdev), 0);
1650} 1499}
1651 1500
1652int 1501int
@@ -1659,41 +1508,6 @@ xfs_setsize_buftarg(
1659} 1508}
1660 1509
1661STATIC int 1510STATIC int
1662xfs_mapping_buftarg(
1663 xfs_buftarg_t *btp,
1664 struct block_device *bdev)
1665{
1666 struct backing_dev_info *bdi;
1667 struct inode *inode;
1668 struct address_space *mapping;
1669 static const struct address_space_operations mapping_aops = {
1670 .sync_page = block_sync_page,
1671 .migratepage = fail_migrate_page,
1672 };
1673
1674 inode = new_inode(bdev->bd_inode->i_sb);
1675 if (!inode) {
1676 printk(KERN_WARNING
1677 "XFS: Cannot allocate mapping inode for device %s\n",
1678 XFS_BUFTARG_NAME(btp));
1679 return ENOMEM;
1680 }
1681 inode->i_ino = get_next_ino();
1682 inode->i_mode = S_IFBLK;
1683 inode->i_bdev = bdev;
1684 inode->i_rdev = bdev->bd_dev;
1685 bdi = blk_get_backing_dev_info(bdev);
1686 if (!bdi)
1687 bdi = &default_backing_dev_info;
1688 mapping = &inode->i_data;
1689 mapping->a_ops = &mapping_aops;
1690 mapping->backing_dev_info = bdi;
1691 mapping_set_gfp_mask(mapping, GFP_NOFS);
1692 btp->bt_mapping = mapping;
1693 return 0;
1694}
1695
1696STATIC int
1697xfs_alloc_delwrite_queue( 1511xfs_alloc_delwrite_queue(
1698 xfs_buftarg_t *btp, 1512 xfs_buftarg_t *btp,
1699 const char *fsname) 1513 const char *fsname)
@@ -1721,12 +1535,14 @@ xfs_alloc_buftarg(
1721 btp->bt_mount = mp; 1535 btp->bt_mount = mp;
1722 btp->bt_dev = bdev->bd_dev; 1536 btp->bt_dev = bdev->bd_dev;
1723 btp->bt_bdev = bdev; 1537 btp->bt_bdev = bdev;
1538 btp->bt_bdi = blk_get_backing_dev_info(bdev);
1539 if (!btp->bt_bdi)
1540 goto error;
1541
1724 INIT_LIST_HEAD(&btp->bt_lru); 1542 INIT_LIST_HEAD(&btp->bt_lru);
1725 spin_lock_init(&btp->bt_lru_lock); 1543 spin_lock_init(&btp->bt_lru_lock);
1726 if (xfs_setsize_buftarg_early(btp, bdev)) 1544 if (xfs_setsize_buftarg_early(btp, bdev))
1727 goto error; 1545 goto error;
1728 if (xfs_mapping_buftarg(btp, bdev))
1729 goto error;
1730 if (xfs_alloc_delwrite_queue(btp, fsname)) 1546 if (xfs_alloc_delwrite_queue(btp, fsname))
1731 goto error; 1547 goto error;
1732 btp->bt_shrinker.shrink = xfs_buftarg_shrink; 1548 btp->bt_shrinker.shrink = xfs_buftarg_shrink;
@@ -1923,8 +1739,8 @@ xfsbufd(
1923 do { 1739 do {
1924 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1740 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1925 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); 1741 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1926 int count = 0;
1927 struct list_head tmp; 1742 struct list_head tmp;
1743 struct blk_plug plug;
1928 1744
1929 if (unlikely(freezing(current))) { 1745 if (unlikely(freezing(current))) {
1930 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1746 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1940,16 +1756,15 @@ xfsbufd(
1940 1756
1941 xfs_buf_delwri_split(target, &tmp, age); 1757 xfs_buf_delwri_split(target, &tmp, age);
1942 list_sort(NULL, &tmp, xfs_buf_cmp); 1758 list_sort(NULL, &tmp, xfs_buf_cmp);
1759
1760 blk_start_plug(&plug);
1943 while (!list_empty(&tmp)) { 1761 while (!list_empty(&tmp)) {
1944 struct xfs_buf *bp; 1762 struct xfs_buf *bp;
1945 bp = list_first_entry(&tmp, struct xfs_buf, b_list); 1763 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1946 list_del_init(&bp->b_list); 1764 list_del_init(&bp->b_list);
1947 xfs_bdstrat_cb(bp); 1765 xfs_bdstrat_cb(bp);
1948 count++;
1949 } 1766 }
1950 if (count) 1767 blk_finish_plug(&plug);
1951 blk_run_address_space(target->bt_mapping);
1952
1953 } while (!kthread_should_stop()); 1768 } while (!kthread_should_stop());
1954 1769
1955 return 0; 1770 return 0;
@@ -1969,6 +1784,7 @@ xfs_flush_buftarg(
1969 int pincount = 0; 1784 int pincount = 0;
1970 LIST_HEAD(tmp_list); 1785 LIST_HEAD(tmp_list);
1971 LIST_HEAD(wait_list); 1786 LIST_HEAD(wait_list);
1787 struct blk_plug plug;
1972 1788
1973 xfs_buf_runall_queues(xfsconvertd_workqueue); 1789 xfs_buf_runall_queues(xfsconvertd_workqueue);
1974 xfs_buf_runall_queues(xfsdatad_workqueue); 1790 xfs_buf_runall_queues(xfsdatad_workqueue);
@@ -1983,6 +1799,8 @@ xfs_flush_buftarg(
1983 * we do that after issuing all the IO. 1799 * we do that after issuing all the IO.
1984 */ 1800 */
1985 list_sort(NULL, &tmp_list, xfs_buf_cmp); 1801 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1802
1803 blk_start_plug(&plug);
1986 while (!list_empty(&tmp_list)) { 1804 while (!list_empty(&tmp_list)) {
1987 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); 1805 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1988 ASSERT(target == bp->b_target); 1806 ASSERT(target == bp->b_target);
@@ -1993,10 +1811,10 @@ xfs_flush_buftarg(
1993 } 1811 }
1994 xfs_bdstrat_cb(bp); 1812 xfs_bdstrat_cb(bp);
1995 } 1813 }
1814 blk_finish_plug(&plug);
1996 1815
1997 if (wait) { 1816 if (wait) {
1998 /* Expedite and wait for IO to complete. */ 1817 /* Wait for IO to complete. */
1999 blk_run_address_space(target->bt_mapping);
2000 while (!list_empty(&wait_list)) { 1818 while (!list_empty(&wait_list)) {
2001 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1819 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2002 1820