diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 392 |
1 files changed, 105 insertions, 287 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index f83a4c830a65..9ef9ed2cfe2e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -94,75 +94,6 @@ xfs_buf_vmap_len( | |||
94 | } | 94 | } |
95 | 95 | ||
96 | /* | 96 | /* |
97 | * Page Region interfaces. | ||
98 | * | ||
99 | * For pages in filesystems where the blocksize is smaller than the | ||
100 | * pagesize, we use the page->private field (long) to hold a bitmap | ||
101 | * of uptodate regions within the page. | ||
102 | * | ||
103 | * Each such region is "bytes per page / bits per long" bytes long. | ||
104 | * | ||
105 | * NBPPR == number-of-bytes-per-page-region | ||
106 | * BTOPR == bytes-to-page-region (rounded up) | ||
107 | * BTOPRT == bytes-to-page-region-truncated (rounded down) | ||
108 | */ | ||
109 | #if (BITS_PER_LONG == 32) | ||
110 | #define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ | ||
111 | #elif (BITS_PER_LONG == 64) | ||
112 | #define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */ | ||
113 | #else | ||
114 | #error BITS_PER_LONG must be 32 or 64 | ||
115 | #endif | ||
116 | #define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG) | ||
117 | #define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT) | ||
118 | #define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT)) | ||
119 | |||
120 | STATIC unsigned long | ||
121 | page_region_mask( | ||
122 | size_t offset, | ||
123 | size_t length) | ||
124 | { | ||
125 | unsigned long mask; | ||
126 | int first, final; | ||
127 | |||
128 | first = BTOPR(offset); | ||
129 | final = BTOPRT(offset + length - 1); | ||
130 | first = min(first, final); | ||
131 | |||
132 | mask = ~0UL; | ||
133 | mask <<= BITS_PER_LONG - (final - first); | ||
134 | mask >>= BITS_PER_LONG - (final); | ||
135 | |||
136 | ASSERT(offset + length <= PAGE_CACHE_SIZE); | ||
137 | ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0); | ||
138 | |||
139 | return mask; | ||
140 | } | ||
141 | |||
142 | STATIC void | ||
143 | set_page_region( | ||
144 | struct page *page, | ||
145 | size_t offset, | ||
146 | size_t length) | ||
147 | { | ||
148 | set_page_private(page, | ||
149 | page_private(page) | page_region_mask(offset, length)); | ||
150 | if (page_private(page) == ~0UL) | ||
151 | SetPageUptodate(page); | ||
152 | } | ||
153 | |||
154 | STATIC int | ||
155 | test_page_region( | ||
156 | struct page *page, | ||
157 | size_t offset, | ||
158 | size_t length) | ||
159 | { | ||
160 | unsigned long mask = page_region_mask(offset, length); | ||
161 | |||
162 | return (mask && (page_private(page) & mask) == mask); | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * xfs_buf_lru_add - add a buffer to the LRU. | 97 | * xfs_buf_lru_add - add a buffer to the LRU. |
167 | * | 98 | * |
168 | * The LRU takes a new reference to the buffer so that it will only be freed | 99 | * The LRU takes a new reference to the buffer so that it will only be freed |
@@ -189,7 +120,7 @@ xfs_buf_lru_add( | |||
189 | * The unlocked check is safe here because it only occurs when there are not | 120 | * The unlocked check is safe here because it only occurs when there are not |
190 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there | 121 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there |
191 | * to optimise the shrinker removing the buffer from the LRU and calling | 122 | * to optimise the shrinker removing the buffer from the LRU and calling |
192 | * xfs_buf_free(). i.e. it removes an unneccessary round trip on the | 123 | * xfs_buf_free(). i.e. it removes an unnecessary round trip on the |
193 | * bt_lru_lock. | 124 | * bt_lru_lock. |
194 | */ | 125 | */ |
195 | STATIC void | 126 | STATIC void |
@@ -332,7 +263,7 @@ xfs_buf_free( | |||
332 | 263 | ||
333 | ASSERT(list_empty(&bp->b_lru)); | 264 | ASSERT(list_empty(&bp->b_lru)); |
334 | 265 | ||
335 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 266 | if (bp->b_flags & _XBF_PAGES) { |
336 | uint i; | 267 | uint i; |
337 | 268 | ||
338 | if (xfs_buf_is_vmapped(bp)) | 269 | if (xfs_buf_is_vmapped(bp)) |
@@ -342,56 +273,77 @@ xfs_buf_free( | |||
342 | for (i = 0; i < bp->b_page_count; i++) { | 273 | for (i = 0; i < bp->b_page_count; i++) { |
343 | struct page *page = bp->b_pages[i]; | 274 | struct page *page = bp->b_pages[i]; |
344 | 275 | ||
345 | if (bp->b_flags & _XBF_PAGE_CACHE) | 276 | __free_page(page); |
346 | ASSERT(!PagePrivate(page)); | ||
347 | page_cache_release(page); | ||
348 | } | 277 | } |
349 | } | 278 | } else if (bp->b_flags & _XBF_KMEM) |
279 | kmem_free(bp->b_addr); | ||
350 | _xfs_buf_free_pages(bp); | 280 | _xfs_buf_free_pages(bp); |
351 | xfs_buf_deallocate(bp); | 281 | xfs_buf_deallocate(bp); |
352 | } | 282 | } |
353 | 283 | ||
354 | /* | 284 | /* |
355 | * Finds all pages for buffer in question and builds it's page list. | 285 | * Allocates all the pages for buffer in question and builds it's page list. |
356 | */ | 286 | */ |
357 | STATIC int | 287 | STATIC int |
358 | _xfs_buf_lookup_pages( | 288 | xfs_buf_allocate_memory( |
359 | xfs_buf_t *bp, | 289 | xfs_buf_t *bp, |
360 | uint flags) | 290 | uint flags) |
361 | { | 291 | { |
362 | struct address_space *mapping = bp->b_target->bt_mapping; | ||
363 | size_t blocksize = bp->b_target->bt_bsize; | ||
364 | size_t size = bp->b_count_desired; | 292 | size_t size = bp->b_count_desired; |
365 | size_t nbytes, offset; | 293 | size_t nbytes, offset; |
366 | gfp_t gfp_mask = xb_to_gfp(flags); | 294 | gfp_t gfp_mask = xb_to_gfp(flags); |
367 | unsigned short page_count, i; | 295 | unsigned short page_count, i; |
368 | pgoff_t first; | ||
369 | xfs_off_t end; | 296 | xfs_off_t end; |
370 | int error; | 297 | int error; |
371 | 298 | ||
299 | /* | ||
300 | * for buffers that are contained within a single page, just allocate | ||
301 | * the memory from the heap - there's no need for the complexity of | ||
302 | * page arrays to keep allocation down to order 0. | ||
303 | */ | ||
304 | if (bp->b_buffer_length < PAGE_SIZE) { | ||
305 | bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); | ||
306 | if (!bp->b_addr) { | ||
307 | /* low memory - use alloc_page loop instead */ | ||
308 | goto use_alloc_page; | ||
309 | } | ||
310 | |||
311 | if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & | ||
312 | PAGE_MASK) != | ||
313 | ((unsigned long)bp->b_addr & PAGE_MASK)) { | ||
314 | /* b_addr spans two pages - use alloc_page instead */ | ||
315 | kmem_free(bp->b_addr); | ||
316 | bp->b_addr = NULL; | ||
317 | goto use_alloc_page; | ||
318 | } | ||
319 | bp->b_offset = offset_in_page(bp->b_addr); | ||
320 | bp->b_pages = bp->b_page_array; | ||
321 | bp->b_pages[0] = virt_to_page(bp->b_addr); | ||
322 | bp->b_page_count = 1; | ||
323 | bp->b_flags |= XBF_MAPPED | _XBF_KMEM; | ||
324 | return 0; | ||
325 | } | ||
326 | |||
327 | use_alloc_page: | ||
372 | end = bp->b_file_offset + bp->b_buffer_length; | 328 | end = bp->b_file_offset + bp->b_buffer_length; |
373 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); | 329 | page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); |
374 | |||
375 | error = _xfs_buf_get_pages(bp, page_count, flags); | 330 | error = _xfs_buf_get_pages(bp, page_count, flags); |
376 | if (unlikely(error)) | 331 | if (unlikely(error)) |
377 | return error; | 332 | return error; |
378 | bp->b_flags |= _XBF_PAGE_CACHE; | ||
379 | 333 | ||
380 | offset = bp->b_offset; | 334 | offset = bp->b_offset; |
381 | first = bp->b_file_offset >> PAGE_CACHE_SHIFT; | 335 | bp->b_flags |= _XBF_PAGES; |
382 | 336 | ||
383 | for (i = 0; i < bp->b_page_count; i++) { | 337 | for (i = 0; i < bp->b_page_count; i++) { |
384 | struct page *page; | 338 | struct page *page; |
385 | uint retries = 0; | 339 | uint retries = 0; |
386 | 340 | retry: | |
387 | retry: | 341 | page = alloc_page(gfp_mask); |
388 | page = find_or_create_page(mapping, first + i, gfp_mask); | ||
389 | if (unlikely(page == NULL)) { | 342 | if (unlikely(page == NULL)) { |
390 | if (flags & XBF_READ_AHEAD) { | 343 | if (flags & XBF_READ_AHEAD) { |
391 | bp->b_page_count = i; | 344 | bp->b_page_count = i; |
392 | for (i = 0; i < bp->b_page_count; i++) | 345 | error = ENOMEM; |
393 | unlock_page(bp->b_pages[i]); | 346 | goto out_free_pages; |
394 | return -ENOMEM; | ||
395 | } | 347 | } |
396 | 348 | ||
397 | /* | 349 | /* |
@@ -401,9 +353,8 @@ _xfs_buf_lookup_pages( | |||
401 | * handle buffer allocation failures we can't do much. | 353 | * handle buffer allocation failures we can't do much. |
402 | */ | 354 | */ |
403 | if (!(++retries % 100)) | 355 | if (!(++retries % 100)) |
404 | printk(KERN_ERR | 356 | xfs_err(NULL, |
405 | "XFS: possible memory allocation " | 357 | "possible memory allocation deadlock in %s (mode:0x%x)", |
406 | "deadlock in %s (mode:0x%x)\n", | ||
407 | __func__, gfp_mask); | 358 | __func__, gfp_mask); |
408 | 359 | ||
409 | XFS_STATS_INC(xb_page_retries); | 360 | XFS_STATS_INC(xb_page_retries); |
@@ -413,52 +364,44 @@ _xfs_buf_lookup_pages( | |||
413 | 364 | ||
414 | XFS_STATS_INC(xb_page_found); | 365 | XFS_STATS_INC(xb_page_found); |
415 | 366 | ||
416 | nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); | 367 | nbytes = min_t(size_t, size, PAGE_SIZE - offset); |
417 | size -= nbytes; | 368 | size -= nbytes; |
418 | |||
419 | ASSERT(!PagePrivate(page)); | ||
420 | if (!PageUptodate(page)) { | ||
421 | page_count--; | ||
422 | if (blocksize >= PAGE_CACHE_SIZE) { | ||
423 | if (flags & XBF_READ) | ||
424 | bp->b_flags |= _XBF_PAGE_LOCKED; | ||
425 | } else if (!PagePrivate(page)) { | ||
426 | if (test_page_region(page, offset, nbytes)) | ||
427 | page_count++; | ||
428 | } | ||
429 | } | ||
430 | |||
431 | bp->b_pages[i] = page; | 369 | bp->b_pages[i] = page; |
432 | offset = 0; | 370 | offset = 0; |
433 | } | 371 | } |
372 | return 0; | ||
434 | 373 | ||
435 | if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { | 374 | out_free_pages: |
436 | for (i = 0; i < bp->b_page_count; i++) | 375 | for (i = 0; i < bp->b_page_count; i++) |
437 | unlock_page(bp->b_pages[i]); | 376 | __free_page(bp->b_pages[i]); |
438 | } | ||
439 | |||
440 | if (page_count == bp->b_page_count) | ||
441 | bp->b_flags |= XBF_DONE; | ||
442 | |||
443 | return error; | 377 | return error; |
444 | } | 378 | } |
445 | 379 | ||
446 | /* | 380 | /* |
447 | * Map buffer into kernel address-space if nessecary. | 381 | * Map buffer into kernel address-space if necessary. |
448 | */ | 382 | */ |
449 | STATIC int | 383 | STATIC int |
450 | _xfs_buf_map_pages( | 384 | _xfs_buf_map_pages( |
451 | xfs_buf_t *bp, | 385 | xfs_buf_t *bp, |
452 | uint flags) | 386 | uint flags) |
453 | { | 387 | { |
454 | /* A single page buffer is always mappable */ | 388 | ASSERT(bp->b_flags & _XBF_PAGES); |
455 | if (bp->b_page_count == 1) { | 389 | if (bp->b_page_count == 1) { |
390 | /* A single page buffer is always mappable */ | ||
456 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; | 391 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; |
457 | bp->b_flags |= XBF_MAPPED; | 392 | bp->b_flags |= XBF_MAPPED; |
458 | } else if (flags & XBF_MAPPED) { | 393 | } else if (flags & XBF_MAPPED) { |
459 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | 394 | int retried = 0; |
460 | -1, PAGE_KERNEL); | 395 | |
461 | if (unlikely(bp->b_addr == NULL)) | 396 | do { |
397 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | ||
398 | -1, PAGE_KERNEL); | ||
399 | if (bp->b_addr) | ||
400 | break; | ||
401 | vm_unmap_aliases(); | ||
402 | } while (retried++ <= 1); | ||
403 | |||
404 | if (!bp->b_addr) | ||
462 | return -ENOMEM; | 405 | return -ENOMEM; |
463 | bp->b_addr += bp->b_offset; | 406 | bp->b_addr += bp->b_offset; |
464 | bp->b_flags |= XBF_MAPPED; | 407 | bp->b_flags |= XBF_MAPPED; |
@@ -569,9 +512,14 @@ found: | |||
569 | } | 512 | } |
570 | } | 513 | } |
571 | 514 | ||
515 | /* | ||
516 | * if the buffer is stale, clear all the external state associated with | ||
517 | * it. We need to keep flags such as how we allocated the buffer memory | ||
518 | * intact here. | ||
519 | */ | ||
572 | if (bp->b_flags & XBF_STALE) { | 520 | if (bp->b_flags & XBF_STALE) { |
573 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); | 521 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); |
574 | bp->b_flags &= XBF_MAPPED; | 522 | bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; |
575 | } | 523 | } |
576 | 524 | ||
577 | trace_xfs_buf_find(bp, flags, _RET_IP_); | 525 | trace_xfs_buf_find(bp, flags, _RET_IP_); |
@@ -592,7 +540,7 @@ xfs_buf_get( | |||
592 | xfs_buf_flags_t flags) | 540 | xfs_buf_flags_t flags) |
593 | { | 541 | { |
594 | xfs_buf_t *bp, *new_bp; | 542 | xfs_buf_t *bp, *new_bp; |
595 | int error = 0, i; | 543 | int error = 0; |
596 | 544 | ||
597 | new_bp = xfs_buf_allocate(flags); | 545 | new_bp = xfs_buf_allocate(flags); |
598 | if (unlikely(!new_bp)) | 546 | if (unlikely(!new_bp)) |
@@ -600,7 +548,7 @@ xfs_buf_get( | |||
600 | 548 | ||
601 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); | 549 | bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); |
602 | if (bp == new_bp) { | 550 | if (bp == new_bp) { |
603 | error = _xfs_buf_lookup_pages(bp, flags); | 551 | error = xfs_buf_allocate_memory(bp, flags); |
604 | if (error) | 552 | if (error) |
605 | goto no_buffer; | 553 | goto no_buffer; |
606 | } else { | 554 | } else { |
@@ -609,14 +557,11 @@ xfs_buf_get( | |||
609 | return NULL; | 557 | return NULL; |
610 | } | 558 | } |
611 | 559 | ||
612 | for (i = 0; i < bp->b_page_count; i++) | ||
613 | mark_page_accessed(bp->b_pages[i]); | ||
614 | |||
615 | if (!(bp->b_flags & XBF_MAPPED)) { | 560 | if (!(bp->b_flags & XBF_MAPPED)) { |
616 | error = _xfs_buf_map_pages(bp, flags); | 561 | error = _xfs_buf_map_pages(bp, flags); |
617 | if (unlikely(error)) { | 562 | if (unlikely(error)) { |
618 | printk(KERN_WARNING "%s: failed to map pages\n", | 563 | xfs_warn(target->bt_mount, |
619 | __func__); | 564 | "%s: failed to map pages\n", __func__); |
620 | goto no_buffer; | 565 | goto no_buffer; |
621 | } | 566 | } |
622 | } | 567 | } |
@@ -710,10 +655,7 @@ xfs_buf_readahead( | |||
710 | xfs_off_t ioff, | 655 | xfs_off_t ioff, |
711 | size_t isize) | 656 | size_t isize) |
712 | { | 657 | { |
713 | struct backing_dev_info *bdi; | 658 | if (bdi_read_congested(target->bt_bdi)) |
714 | |||
715 | bdi = target->bt_mapping->backing_dev_info; | ||
716 | if (bdi_read_congested(bdi)) | ||
717 | return; | 659 | return; |
718 | 660 | ||
719 | xfs_buf_read(target, ioff, isize, | 661 | xfs_buf_read(target, ioff, isize, |
@@ -791,10 +733,10 @@ xfs_buf_associate_memory( | |||
791 | size_t buflen; | 733 | size_t buflen; |
792 | int page_count; | 734 | int page_count; |
793 | 735 | ||
794 | pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; | 736 | pageaddr = (unsigned long)mem & PAGE_MASK; |
795 | offset = (unsigned long)mem - pageaddr; | 737 | offset = (unsigned long)mem - pageaddr; |
796 | buflen = PAGE_CACHE_ALIGN(len + offset); | 738 | buflen = PAGE_ALIGN(len + offset); |
797 | page_count = buflen >> PAGE_CACHE_SHIFT; | 739 | page_count = buflen >> PAGE_SHIFT; |
798 | 740 | ||
799 | /* Free any previous set of page pointers */ | 741 | /* Free any previous set of page pointers */ |
800 | if (bp->b_pages) | 742 | if (bp->b_pages) |
@@ -811,13 +753,12 @@ xfs_buf_associate_memory( | |||
811 | 753 | ||
812 | for (i = 0; i < bp->b_page_count; i++) { | 754 | for (i = 0; i < bp->b_page_count; i++) { |
813 | bp->b_pages[i] = mem_to_page((void *)pageaddr); | 755 | bp->b_pages[i] = mem_to_page((void *)pageaddr); |
814 | pageaddr += PAGE_CACHE_SIZE; | 756 | pageaddr += PAGE_SIZE; |
815 | } | 757 | } |
816 | 758 | ||
817 | bp->b_count_desired = len; | 759 | bp->b_count_desired = len; |
818 | bp->b_buffer_length = buflen; | 760 | bp->b_buffer_length = buflen; |
819 | bp->b_flags |= XBF_MAPPED; | 761 | bp->b_flags |= XBF_MAPPED; |
820 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
821 | 762 | ||
822 | return 0; | 763 | return 0; |
823 | } | 764 | } |
@@ -850,8 +791,8 @@ xfs_buf_get_uncached( | |||
850 | 791 | ||
851 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); | 792 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); |
852 | if (unlikely(error)) { | 793 | if (unlikely(error)) { |
853 | printk(KERN_WARNING "%s: failed to map pages\n", | 794 | xfs_warn(target->bt_mount, |
854 | __func__); | 795 | "%s: failed to map pages\n", __func__); |
855 | goto fail_free_mem; | 796 | goto fail_free_mem; |
856 | } | 797 | } |
857 | 798 | ||
@@ -924,20 +865,7 @@ xfs_buf_rele( | |||
924 | 865 | ||
925 | 866 | ||
926 | /* | 867 | /* |
927 | * Mutual exclusion on buffers. Locking model: | 868 | * Lock a buffer object, if it is not already locked. |
928 | * | ||
929 | * Buffers associated with inodes for which buffer locking | ||
930 | * is not enabled are not protected by semaphores, and are | ||
931 | * assumed to be exclusively owned by the caller. There is a | ||
932 | * spinlock in the buffer, used by the caller when concurrent | ||
933 | * access is possible. | ||
934 | */ | ||
935 | |||
936 | /* | ||
937 | * Locks a buffer object, if it is not already locked. Note that this in | ||
938 | * no way locks the underlying pages, so it is only useful for | ||
939 | * synchronizing concurrent use of buffer objects, not for synchronizing | ||
940 | * independent access to the underlying pages. | ||
941 | * | 869 | * |
942 | * If we come across a stale, pinned, locked buffer, we know that we are | 870 | * If we come across a stale, pinned, locked buffer, we know that we are |
943 | * being asked to lock a buffer that has been reallocated. Because it is | 871 | * being asked to lock a buffer that has been reallocated. Because it is |
@@ -971,10 +899,7 @@ xfs_buf_lock_value( | |||
971 | } | 899 | } |
972 | 900 | ||
973 | /* | 901 | /* |
974 | * Locks a buffer object. | 902 | * Lock a buffer object. |
975 | * Note that this in no way locks the underlying pages, so it is only | ||
976 | * useful for synchronizing concurrent use of buffer objects, not for | ||
977 | * synchronizing independent access to the underlying pages. | ||
978 | * | 903 | * |
979 | * If we come across a stale, pinned, locked buffer, we know that we | 904 | * If we come across a stale, pinned, locked buffer, we know that we |
980 | * are being asked to lock a buffer that has been reallocated. Because | 905 | * are being asked to lock a buffer that has been reallocated. Because |
@@ -990,8 +915,6 @@ xfs_buf_lock( | |||
990 | 915 | ||
991 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | 916 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) |
992 | xfs_log_force(bp->b_target->bt_mount, 0); | 917 | xfs_log_force(bp->b_target->bt_mount, 0); |
993 | if (atomic_read(&bp->b_io_remaining)) | ||
994 | blk_run_address_space(bp->b_target->bt_mapping); | ||
995 | down(&bp->b_sema); | 918 | down(&bp->b_sema); |
996 | XB_SET_OWNER(bp); | 919 | XB_SET_OWNER(bp); |
997 | 920 | ||
@@ -1035,9 +958,7 @@ xfs_buf_wait_unpin( | |||
1035 | set_current_state(TASK_UNINTERRUPTIBLE); | 958 | set_current_state(TASK_UNINTERRUPTIBLE); |
1036 | if (atomic_read(&bp->b_pin_count) == 0) | 959 | if (atomic_read(&bp->b_pin_count) == 0) |
1037 | break; | 960 | break; |
1038 | if (atomic_read(&bp->b_io_remaining)) | 961 | io_schedule(); |
1039 | blk_run_address_space(bp->b_target->bt_mapping); | ||
1040 | schedule(); | ||
1041 | } | 962 | } |
1042 | remove_wait_queue(&bp->b_waiters, &wait); | 963 | remove_wait_queue(&bp->b_waiters, &wait); |
1043 | set_current_state(TASK_RUNNING); | 964 | set_current_state(TASK_RUNNING); |
@@ -1249,10 +1170,8 @@ _xfs_buf_ioend( | |||
1249 | xfs_buf_t *bp, | 1170 | xfs_buf_t *bp, |
1250 | int schedule) | 1171 | int schedule) |
1251 | { | 1172 | { |
1252 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { | 1173 | if (atomic_dec_and_test(&bp->b_io_remaining) == 1) |
1253 | bp->b_flags &= ~_XBF_PAGE_LOCKED; | ||
1254 | xfs_buf_ioend(bp, schedule); | 1174 | xfs_buf_ioend(bp, schedule); |
1255 | } | ||
1256 | } | 1175 | } |
1257 | 1176 | ||
1258 | STATIC void | 1177 | STATIC void |
@@ -1261,35 +1180,12 @@ xfs_buf_bio_end_io( | |||
1261 | int error) | 1180 | int error) |
1262 | { | 1181 | { |
1263 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; | 1182 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; |
1264 | unsigned int blocksize = bp->b_target->bt_bsize; | ||
1265 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
1266 | 1183 | ||
1267 | xfs_buf_ioerror(bp, -error); | 1184 | xfs_buf_ioerror(bp, -error); |
1268 | 1185 | ||
1269 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) | 1186 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) |
1270 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); | 1187 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); |
1271 | 1188 | ||
1272 | do { | ||
1273 | struct page *page = bvec->bv_page; | ||
1274 | |||
1275 | ASSERT(!PagePrivate(page)); | ||
1276 | if (unlikely(bp->b_error)) { | ||
1277 | if (bp->b_flags & XBF_READ) | ||
1278 | ClearPageUptodate(page); | ||
1279 | } else if (blocksize >= PAGE_CACHE_SIZE) { | ||
1280 | SetPageUptodate(page); | ||
1281 | } else if (!PagePrivate(page) && | ||
1282 | (bp->b_flags & _XBF_PAGE_CACHE)) { | ||
1283 | set_page_region(page, bvec->bv_offset, bvec->bv_len); | ||
1284 | } | ||
1285 | |||
1286 | if (--bvec >= bio->bi_io_vec) | ||
1287 | prefetchw(&bvec->bv_page->flags); | ||
1288 | |||
1289 | if (bp->b_flags & _XBF_PAGE_LOCKED) | ||
1290 | unlock_page(page); | ||
1291 | } while (bvec >= bio->bi_io_vec); | ||
1292 | |||
1293 | _xfs_buf_ioend(bp, 1); | 1189 | _xfs_buf_ioend(bp, 1); |
1294 | bio_put(bio); | 1190 | bio_put(bio); |
1295 | } | 1191 | } |
@@ -1303,7 +1199,6 @@ _xfs_buf_ioapply( | |||
1303 | int offset = bp->b_offset; | 1199 | int offset = bp->b_offset; |
1304 | int size = bp->b_count_desired; | 1200 | int size = bp->b_count_desired; |
1305 | sector_t sector = bp->b_bn; | 1201 | sector_t sector = bp->b_bn; |
1306 | unsigned int blocksize = bp->b_target->bt_bsize; | ||
1307 | 1202 | ||
1308 | total_nr_pages = bp->b_page_count; | 1203 | total_nr_pages = bp->b_page_count; |
1309 | map_i = 0; | 1204 | map_i = 0; |
@@ -1324,29 +1219,6 @@ _xfs_buf_ioapply( | |||
1324 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; | 1219 | (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; |
1325 | } | 1220 | } |
1326 | 1221 | ||
1327 | /* Special code path for reading a sub page size buffer in -- | ||
1328 | * we populate up the whole page, and hence the other metadata | ||
1329 | * in the same page. This optimization is only valid when the | ||
1330 | * filesystem block size is not smaller than the page size. | ||
1331 | */ | ||
1332 | if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && | ||
1333 | ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == | ||
1334 | (XBF_READ|_XBF_PAGE_LOCKED)) && | ||
1335 | (blocksize >= PAGE_CACHE_SIZE)) { | ||
1336 | bio = bio_alloc(GFP_NOIO, 1); | ||
1337 | |||
1338 | bio->bi_bdev = bp->b_target->bt_bdev; | ||
1339 | bio->bi_sector = sector - (offset >> BBSHIFT); | ||
1340 | bio->bi_end_io = xfs_buf_bio_end_io; | ||
1341 | bio->bi_private = bp; | ||
1342 | |||
1343 | bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0); | ||
1344 | size = 0; | ||
1345 | |||
1346 | atomic_inc(&bp->b_io_remaining); | ||
1347 | |||
1348 | goto submit_io; | ||
1349 | } | ||
1350 | 1222 | ||
1351 | next_chunk: | 1223 | next_chunk: |
1352 | atomic_inc(&bp->b_io_remaining); | 1224 | atomic_inc(&bp->b_io_remaining); |
@@ -1360,8 +1232,9 @@ next_chunk: | |||
1360 | bio->bi_end_io = xfs_buf_bio_end_io; | 1232 | bio->bi_end_io = xfs_buf_bio_end_io; |
1361 | bio->bi_private = bp; | 1233 | bio->bi_private = bp; |
1362 | 1234 | ||
1235 | |||
1363 | for (; size && nr_pages; nr_pages--, map_i++) { | 1236 | for (; size && nr_pages; nr_pages--, map_i++) { |
1364 | int rbytes, nbytes = PAGE_CACHE_SIZE - offset; | 1237 | int rbytes, nbytes = PAGE_SIZE - offset; |
1365 | 1238 | ||
1366 | if (nbytes > size) | 1239 | if (nbytes > size) |
1367 | nbytes = size; | 1240 | nbytes = size; |
@@ -1376,7 +1249,6 @@ next_chunk: | |||
1376 | total_nr_pages--; | 1249 | total_nr_pages--; |
1377 | } | 1250 | } |
1378 | 1251 | ||
1379 | submit_io: | ||
1380 | if (likely(bio->bi_size)) { | 1252 | if (likely(bio->bi_size)) { |
1381 | if (xfs_buf_is_vmapped(bp)) { | 1253 | if (xfs_buf_is_vmapped(bp)) { |
1382 | flush_kernel_vmap_range(bp->b_addr, | 1254 | flush_kernel_vmap_range(bp->b_addr, |
@@ -1386,18 +1258,7 @@ submit_io: | |||
1386 | if (size) | 1258 | if (size) |
1387 | goto next_chunk; | 1259 | goto next_chunk; |
1388 | } else { | 1260 | } else { |
1389 | /* | ||
1390 | * if we get here, no pages were added to the bio. However, | ||
1391 | * we can't just error out here - if the pages are locked then | ||
1392 | * we have to unlock them otherwise we can hang on a later | ||
1393 | * access to the page. | ||
1394 | */ | ||
1395 | xfs_buf_ioerror(bp, EIO); | 1261 | xfs_buf_ioerror(bp, EIO); |
1396 | if (bp->b_flags & _XBF_PAGE_LOCKED) { | ||
1397 | int i; | ||
1398 | for (i = 0; i < bp->b_page_count; i++) | ||
1399 | unlock_page(bp->b_pages[i]); | ||
1400 | } | ||
1401 | bio_put(bio); | 1262 | bio_put(bio); |
1402 | } | 1263 | } |
1403 | } | 1264 | } |
@@ -1442,8 +1303,6 @@ xfs_buf_iowait( | |||
1442 | { | 1303 | { |
1443 | trace_xfs_buf_iowait(bp, _RET_IP_); | 1304 | trace_xfs_buf_iowait(bp, _RET_IP_); |
1444 | 1305 | ||
1445 | if (atomic_read(&bp->b_io_remaining)) | ||
1446 | blk_run_address_space(bp->b_target->bt_mapping); | ||
1447 | wait_for_completion(&bp->b_iowait); | 1306 | wait_for_completion(&bp->b_iowait); |
1448 | 1307 | ||
1449 | trace_xfs_buf_iowait_done(bp, _RET_IP_); | 1308 | trace_xfs_buf_iowait_done(bp, _RET_IP_); |
@@ -1461,8 +1320,8 @@ xfs_buf_offset( | |||
1461 | return XFS_BUF_PTR(bp) + offset; | 1320 | return XFS_BUF_PTR(bp) + offset; |
1462 | 1321 | ||
1463 | offset += bp->b_offset; | 1322 | offset += bp->b_offset; |
1464 | page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; | 1323 | page = bp->b_pages[offset >> PAGE_SHIFT]; |
1465 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); | 1324 | return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); |
1466 | } | 1325 | } |
1467 | 1326 | ||
1468 | /* | 1327 | /* |
@@ -1484,9 +1343,9 @@ xfs_buf_iomove( | |||
1484 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; | 1343 | page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; |
1485 | cpoff = xfs_buf_poff(boff + bp->b_offset); | 1344 | cpoff = xfs_buf_poff(boff + bp->b_offset); |
1486 | csize = min_t(size_t, | 1345 | csize = min_t(size_t, |
1487 | PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); | 1346 | PAGE_SIZE-cpoff, bp->b_count_desired-boff); |
1488 | 1347 | ||
1489 | ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); | 1348 | ASSERT(((csize + cpoff) <= PAGE_SIZE)); |
1490 | 1349 | ||
1491 | switch (mode) { | 1350 | switch (mode) { |
1492 | case XBRW_ZERO: | 1351 | case XBRW_ZERO: |
@@ -1599,7 +1458,6 @@ xfs_free_buftarg( | |||
1599 | xfs_flush_buftarg(btp, 1); | 1458 | xfs_flush_buftarg(btp, 1); |
1600 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1459 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1601 | xfs_blkdev_issue_flush(btp); | 1460 | xfs_blkdev_issue_flush(btp); |
1602 | iput(btp->bt_mapping->host); | ||
1603 | 1461 | ||
1604 | kthread_stop(btp->bt_task); | 1462 | kthread_stop(btp->bt_task); |
1605 | kmem_free(btp); | 1463 | kmem_free(btp); |
@@ -1617,21 +1475,12 @@ xfs_setsize_buftarg_flags( | |||
1617 | btp->bt_smask = sectorsize - 1; | 1475 | btp->bt_smask = sectorsize - 1; |
1618 | 1476 | ||
1619 | if (set_blocksize(btp->bt_bdev, sectorsize)) { | 1477 | if (set_blocksize(btp->bt_bdev, sectorsize)) { |
1620 | printk(KERN_WARNING | 1478 | xfs_warn(btp->bt_mount, |
1621 | "XFS: Cannot set_blocksize to %u on device %s\n", | 1479 | "Cannot set_blocksize to %u on device %s\n", |
1622 | sectorsize, XFS_BUFTARG_NAME(btp)); | 1480 | sectorsize, XFS_BUFTARG_NAME(btp)); |
1623 | return EINVAL; | 1481 | return EINVAL; |
1624 | } | 1482 | } |
1625 | 1483 | ||
1626 | if (verbose && | ||
1627 | (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { | ||
1628 | printk(KERN_WARNING | ||
1629 | "XFS: %u byte sectors in use on device %s. " | ||
1630 | "This is suboptimal; %u or greater is ideal.\n", | ||
1631 | sectorsize, XFS_BUFTARG_NAME(btp), | ||
1632 | (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); | ||
1633 | } | ||
1634 | |||
1635 | return 0; | 1484 | return 0; |
1636 | } | 1485 | } |
1637 | 1486 | ||
@@ -1646,7 +1495,7 @@ xfs_setsize_buftarg_early( | |||
1646 | struct block_device *bdev) | 1495 | struct block_device *bdev) |
1647 | { | 1496 | { |
1648 | return xfs_setsize_buftarg_flags(btp, | 1497 | return xfs_setsize_buftarg_flags(btp, |
1649 | PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); | 1498 | PAGE_SIZE, bdev_logical_block_size(bdev), 0); |
1650 | } | 1499 | } |
1651 | 1500 | ||
1652 | int | 1501 | int |
@@ -1659,41 +1508,6 @@ xfs_setsize_buftarg( | |||
1659 | } | 1508 | } |
1660 | 1509 | ||
1661 | STATIC int | 1510 | STATIC int |
1662 | xfs_mapping_buftarg( | ||
1663 | xfs_buftarg_t *btp, | ||
1664 | struct block_device *bdev) | ||
1665 | { | ||
1666 | struct backing_dev_info *bdi; | ||
1667 | struct inode *inode; | ||
1668 | struct address_space *mapping; | ||
1669 | static const struct address_space_operations mapping_aops = { | ||
1670 | .sync_page = block_sync_page, | ||
1671 | .migratepage = fail_migrate_page, | ||
1672 | }; | ||
1673 | |||
1674 | inode = new_inode(bdev->bd_inode->i_sb); | ||
1675 | if (!inode) { | ||
1676 | printk(KERN_WARNING | ||
1677 | "XFS: Cannot allocate mapping inode for device %s\n", | ||
1678 | XFS_BUFTARG_NAME(btp)); | ||
1679 | return ENOMEM; | ||
1680 | } | ||
1681 | inode->i_ino = get_next_ino(); | ||
1682 | inode->i_mode = S_IFBLK; | ||
1683 | inode->i_bdev = bdev; | ||
1684 | inode->i_rdev = bdev->bd_dev; | ||
1685 | bdi = blk_get_backing_dev_info(bdev); | ||
1686 | if (!bdi) | ||
1687 | bdi = &default_backing_dev_info; | ||
1688 | mapping = &inode->i_data; | ||
1689 | mapping->a_ops = &mapping_aops; | ||
1690 | mapping->backing_dev_info = bdi; | ||
1691 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
1692 | btp->bt_mapping = mapping; | ||
1693 | return 0; | ||
1694 | } | ||
1695 | |||
1696 | STATIC int | ||
1697 | xfs_alloc_delwrite_queue( | 1511 | xfs_alloc_delwrite_queue( |
1698 | xfs_buftarg_t *btp, | 1512 | xfs_buftarg_t *btp, |
1699 | const char *fsname) | 1513 | const char *fsname) |
@@ -1721,12 +1535,14 @@ xfs_alloc_buftarg( | |||
1721 | btp->bt_mount = mp; | 1535 | btp->bt_mount = mp; |
1722 | btp->bt_dev = bdev->bd_dev; | 1536 | btp->bt_dev = bdev->bd_dev; |
1723 | btp->bt_bdev = bdev; | 1537 | btp->bt_bdev = bdev; |
1538 | btp->bt_bdi = blk_get_backing_dev_info(bdev); | ||
1539 | if (!btp->bt_bdi) | ||
1540 | goto error; | ||
1541 | |||
1724 | INIT_LIST_HEAD(&btp->bt_lru); | 1542 | INIT_LIST_HEAD(&btp->bt_lru); |
1725 | spin_lock_init(&btp->bt_lru_lock); | 1543 | spin_lock_init(&btp->bt_lru_lock); |
1726 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1544 | if (xfs_setsize_buftarg_early(btp, bdev)) |
1727 | goto error; | 1545 | goto error; |
1728 | if (xfs_mapping_buftarg(btp, bdev)) | ||
1729 | goto error; | ||
1730 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1546 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
1731 | goto error; | 1547 | goto error; |
1732 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | 1548 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; |
@@ -1923,8 +1739,8 @@ xfsbufd( | |||
1923 | do { | 1739 | do { |
1924 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | 1740 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); |
1925 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); | 1741 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); |
1926 | int count = 0; | ||
1927 | struct list_head tmp; | 1742 | struct list_head tmp; |
1743 | struct blk_plug plug; | ||
1928 | 1744 | ||
1929 | if (unlikely(freezing(current))) { | 1745 | if (unlikely(freezing(current))) { |
1930 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); | 1746 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); |
@@ -1940,16 +1756,15 @@ xfsbufd( | |||
1940 | 1756 | ||
1941 | xfs_buf_delwri_split(target, &tmp, age); | 1757 | xfs_buf_delwri_split(target, &tmp, age); |
1942 | list_sort(NULL, &tmp, xfs_buf_cmp); | 1758 | list_sort(NULL, &tmp, xfs_buf_cmp); |
1759 | |||
1760 | blk_start_plug(&plug); | ||
1943 | while (!list_empty(&tmp)) { | 1761 | while (!list_empty(&tmp)) { |
1944 | struct xfs_buf *bp; | 1762 | struct xfs_buf *bp; |
1945 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); | 1763 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); |
1946 | list_del_init(&bp->b_list); | 1764 | list_del_init(&bp->b_list); |
1947 | xfs_bdstrat_cb(bp); | 1765 | xfs_bdstrat_cb(bp); |
1948 | count++; | ||
1949 | } | 1766 | } |
1950 | if (count) | 1767 | blk_finish_plug(&plug); |
1951 | blk_run_address_space(target->bt_mapping); | ||
1952 | |||
1953 | } while (!kthread_should_stop()); | 1768 | } while (!kthread_should_stop()); |
1954 | 1769 | ||
1955 | return 0; | 1770 | return 0; |
@@ -1969,6 +1784,7 @@ xfs_flush_buftarg( | |||
1969 | int pincount = 0; | 1784 | int pincount = 0; |
1970 | LIST_HEAD(tmp_list); | 1785 | LIST_HEAD(tmp_list); |
1971 | LIST_HEAD(wait_list); | 1786 | LIST_HEAD(wait_list); |
1787 | struct blk_plug plug; | ||
1972 | 1788 | ||
1973 | xfs_buf_runall_queues(xfsconvertd_workqueue); | 1789 | xfs_buf_runall_queues(xfsconvertd_workqueue); |
1974 | xfs_buf_runall_queues(xfsdatad_workqueue); | 1790 | xfs_buf_runall_queues(xfsdatad_workqueue); |
@@ -1983,6 +1799,8 @@ xfs_flush_buftarg( | |||
1983 | * we do that after issuing all the IO. | 1799 | * we do that after issuing all the IO. |
1984 | */ | 1800 | */ |
1985 | list_sort(NULL, &tmp_list, xfs_buf_cmp); | 1801 | list_sort(NULL, &tmp_list, xfs_buf_cmp); |
1802 | |||
1803 | blk_start_plug(&plug); | ||
1986 | while (!list_empty(&tmp_list)) { | 1804 | while (!list_empty(&tmp_list)) { |
1987 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); | 1805 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); |
1988 | ASSERT(target == bp->b_target); | 1806 | ASSERT(target == bp->b_target); |
@@ -1993,10 +1811,10 @@ xfs_flush_buftarg( | |||
1993 | } | 1811 | } |
1994 | xfs_bdstrat_cb(bp); | 1812 | xfs_bdstrat_cb(bp); |
1995 | } | 1813 | } |
1814 | blk_finish_plug(&plug); | ||
1996 | 1815 | ||
1997 | if (wait) { | 1816 | if (wait) { |
1998 | /* Expedite and wait for IO to complete. */ | 1817 | /* Wait for IO to complete. */ |
1999 | blk_run_address_space(target->bt_mapping); | ||
2000 | while (!list_empty(&wait_list)) { | 1818 | while (!list_empty(&wait_list)) { |
2001 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | 1819 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); |
2002 | 1820 | ||