aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c341
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h40
2 files changed, 84 insertions, 297 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index a5a260fab824..d45b2cdee6c4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -94,75 +94,6 @@ xfs_buf_vmap_len(
94} 94}
95 95
96/* 96/*
97 * Page Region interfaces.
98 *
99 * For pages in filesystems where the blocksize is smaller than the
100 * pagesize, we use the page->private field (long) to hold a bitmap
101 * of uptodate regions within the page.
102 *
103 * Each such region is "bytes per page / bits per long" bytes long.
104 *
105 * NBPPR == number-of-bytes-per-page-region
106 * BTOPR == bytes-to-page-region (rounded up)
107 * BTOPRT == bytes-to-page-region-truncated (rounded down)
108 */
109#if (BITS_PER_LONG == 32)
110#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
111#elif (BITS_PER_LONG == 64)
112#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */
113#else
114#error BITS_PER_LONG must be 32 or 64
115#endif
116#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG)
117#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
118#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT))
119
120STATIC unsigned long
121page_region_mask(
122 size_t offset,
123 size_t length)
124{
125 unsigned long mask;
126 int first, final;
127
128 first = BTOPR(offset);
129 final = BTOPRT(offset + length - 1);
130 first = min(first, final);
131
132 mask = ~0UL;
133 mask <<= BITS_PER_LONG - (final - first);
134 mask >>= BITS_PER_LONG - (final);
135
136 ASSERT(offset + length <= PAGE_CACHE_SIZE);
137 ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
138
139 return mask;
140}
141
142STATIC void
143set_page_region(
144 struct page *page,
145 size_t offset,
146 size_t length)
147{
148 set_page_private(page,
149 page_private(page) | page_region_mask(offset, length));
150 if (page_private(page) == ~0UL)
151 SetPageUptodate(page);
152}
153
154STATIC int
155test_page_region(
156 struct page *page,
157 size_t offset,
158 size_t length)
159{
160 unsigned long mask = page_region_mask(offset, length);
161
162 return (mask && (page_private(page) & mask) == mask);
163}
164
165/*
166 * xfs_buf_lru_add - add a buffer to the LRU. 97 * xfs_buf_lru_add - add a buffer to the LRU.
167 * 98 *
168 * The LRU takes a new reference to the buffer so that it will only be freed 99 * The LRU takes a new reference to the buffer so that it will only be freed
@@ -332,7 +263,7 @@ xfs_buf_free(
332 263
333 ASSERT(list_empty(&bp->b_lru)); 264 ASSERT(list_empty(&bp->b_lru));
334 265
335 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 266 if (bp->b_flags & _XBF_PAGES) {
336 uint i; 267 uint i;
337 268
338 if (xfs_buf_is_vmapped(bp)) 269 if (xfs_buf_is_vmapped(bp))
@@ -342,25 +273,22 @@ xfs_buf_free(
342 for (i = 0; i < bp->b_page_count; i++) { 273 for (i = 0; i < bp->b_page_count; i++) {
343 struct page *page = bp->b_pages[i]; 274 struct page *page = bp->b_pages[i];
344 275
345 if (bp->b_flags & _XBF_PAGE_CACHE) 276 __free_page(page);
346 ASSERT(!PagePrivate(page));
347 page_cache_release(page);
348 } 277 }
349 } 278 } else if (bp->b_flags & _XBF_KMEM)
279 kmem_free(bp->b_addr);
350 _xfs_buf_free_pages(bp); 280 _xfs_buf_free_pages(bp);
351 xfs_buf_deallocate(bp); 281 xfs_buf_deallocate(bp);
352} 282}
353 283
354/* 284/*
355 * Finds all pages for buffer in question and builds it's page list. 285 * Allocates all the pages for buffer in question and builds it's page list.
356 */ 286 */
357STATIC int 287STATIC int
358_xfs_buf_lookup_pages( 288xfs_buf_allocate_memory(
359 xfs_buf_t *bp, 289 xfs_buf_t *bp,
360 uint flags) 290 uint flags)
361{ 291{
362 struct address_space *mapping = bp->b_target->bt_mapping;
363 size_t blocksize = bp->b_target->bt_bsize;
364 size_t size = bp->b_count_desired; 292 size_t size = bp->b_count_desired;
365 size_t nbytes, offset; 293 size_t nbytes, offset;
366 gfp_t gfp_mask = xb_to_gfp(flags); 294 gfp_t gfp_mask = xb_to_gfp(flags);
@@ -369,29 +297,55 @@ _xfs_buf_lookup_pages(
369 xfs_off_t end; 297 xfs_off_t end;
370 int error; 298 int error;
371 299
300 /*
301 * for buffers that are contained within a single page, just allocate
302 * the memory from the heap - there's no need for the complexity of
303 * page arrays to keep allocation down to order 0.
304 */
305 if (bp->b_buffer_length < PAGE_SIZE) {
306 bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
307 if (!bp->b_addr) {
308 /* low memory - use alloc_page loop instead */
309 goto use_alloc_page;
310 }
311
312 if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
313 PAGE_MASK) !=
314 ((unsigned long)bp->b_addr & PAGE_MASK)) {
315 /* b_addr spans two pages - use alloc_page instead */
316 kmem_free(bp->b_addr);
317 bp->b_addr = NULL;
318 goto use_alloc_page;
319 }
320 bp->b_offset = offset_in_page(bp->b_addr);
321 bp->b_pages = bp->b_page_array;
322 bp->b_pages[0] = virt_to_page(bp->b_addr);
323 bp->b_page_count = 1;
324 bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
325 return 0;
326 }
327
328use_alloc_page:
372 end = bp->b_file_offset + bp->b_buffer_length; 329 end = bp->b_file_offset + bp->b_buffer_length;
373 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); 330 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
374
375 error = _xfs_buf_get_pages(bp, page_count, flags); 331 error = _xfs_buf_get_pages(bp, page_count, flags);
376 if (unlikely(error)) 332 if (unlikely(error))
377 return error; 333 return error;
378 bp->b_flags |= _XBF_PAGE_CACHE;
379 334
380 offset = bp->b_offset; 335 offset = bp->b_offset;
381 first = bp->b_file_offset >> PAGE_CACHE_SHIFT; 336 first = bp->b_file_offset >> PAGE_SHIFT;
337 bp->b_flags |= _XBF_PAGES;
382 338
383 for (i = 0; i < bp->b_page_count; i++) { 339 for (i = 0; i < bp->b_page_count; i++) {
384 struct page *page; 340 struct page *page;
385 uint retries = 0; 341 uint retries = 0;
386 342retry:
387 retry: 343 page = alloc_page(gfp_mask);
388 page = find_or_create_page(mapping, first + i, gfp_mask);
389 if (unlikely(page == NULL)) { 344 if (unlikely(page == NULL)) {
390 if (flags & XBF_READ_AHEAD) { 345 if (flags & XBF_READ_AHEAD) {
391 bp->b_page_count = i; 346 bp->b_page_count = i;
392 for (i = 0; i < bp->b_page_count; i++) 347 error = ENOMEM;
393 unlock_page(bp->b_pages[i]); 348 goto out_free_pages;
394 return -ENOMEM;
395 } 349 }
396 350
397 /* 351 /*
@@ -412,33 +366,16 @@ _xfs_buf_lookup_pages(
412 366
413 XFS_STATS_INC(xb_page_found); 367 XFS_STATS_INC(xb_page_found);
414 368
415 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 369 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
416 size -= nbytes; 370 size -= nbytes;
417
418 ASSERT(!PagePrivate(page));
419 if (!PageUptodate(page)) {
420 page_count--;
421 if (blocksize >= PAGE_CACHE_SIZE) {
422 if (flags & XBF_READ)
423 bp->b_flags |= _XBF_PAGE_LOCKED;
424 } else if (!PagePrivate(page)) {
425 if (test_page_region(page, offset, nbytes))
426 page_count++;
427 }
428 }
429
430 bp->b_pages[i] = page; 371 bp->b_pages[i] = page;
431 offset = 0; 372 offset = 0;
432 } 373 }
374 return 0;
433 375
434 if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { 376out_free_pages:
435 for (i = 0; i < bp->b_page_count; i++) 377 for (i = 0; i < bp->b_page_count; i++)
436 unlock_page(bp->b_pages[i]); 378 __free_page(bp->b_pages[i]);
437 }
438
439 if (page_count == bp->b_page_count)
440 bp->b_flags |= XBF_DONE;
441
442 return error; 379 return error;
443} 380}
444 381
@@ -450,8 +387,9 @@ _xfs_buf_map_pages(
450 xfs_buf_t *bp, 387 xfs_buf_t *bp,
451 uint flags) 388 uint flags)
452{ 389{
453 /* A single page buffer is always mappable */ 390 ASSERT(bp->b_flags & _XBF_PAGES);
454 if (bp->b_page_count == 1) { 391 if (bp->b_page_count == 1) {
392 /* A single page buffer is always mappable */
455 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 393 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
456 bp->b_flags |= XBF_MAPPED; 394 bp->b_flags |= XBF_MAPPED;
457 } else if (flags & XBF_MAPPED) { 395 } else if (flags & XBF_MAPPED) {
@@ -576,9 +514,14 @@ found:
576 } 514 }
577 } 515 }
578 516
517 /*
518 * if the buffer is stale, clear all the external state associated with
519 * it. We need to keep flags such as how we allocated the buffer memory
520 * intact here.
521 */
579 if (bp->b_flags & XBF_STALE) { 522 if (bp->b_flags & XBF_STALE) {
580 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 523 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
581 bp->b_flags &= XBF_MAPPED; 524 bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
582 } 525 }
583 526
584 trace_xfs_buf_find(bp, flags, _RET_IP_); 527 trace_xfs_buf_find(bp, flags, _RET_IP_);
@@ -599,7 +542,7 @@ xfs_buf_get(
599 xfs_buf_flags_t flags) 542 xfs_buf_flags_t flags)
600{ 543{
601 xfs_buf_t *bp, *new_bp; 544 xfs_buf_t *bp, *new_bp;
602 int error = 0, i; 545 int error = 0;
603 546
604 new_bp = xfs_buf_allocate(flags); 547 new_bp = xfs_buf_allocate(flags);
605 if (unlikely(!new_bp)) 548 if (unlikely(!new_bp))
@@ -607,7 +550,7 @@ xfs_buf_get(
607 550
608 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); 551 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
609 if (bp == new_bp) { 552 if (bp == new_bp) {
610 error = _xfs_buf_lookup_pages(bp, flags); 553 error = xfs_buf_allocate_memory(bp, flags);
611 if (error) 554 if (error)
612 goto no_buffer; 555 goto no_buffer;
613 } else { 556 } else {
@@ -616,9 +559,6 @@ xfs_buf_get(
616 return NULL; 559 return NULL;
617 } 560 }
618 561
619 for (i = 0; i < bp->b_page_count; i++)
620 mark_page_accessed(bp->b_pages[i]);
621
622 if (!(bp->b_flags & XBF_MAPPED)) { 562 if (!(bp->b_flags & XBF_MAPPED)) {
623 error = _xfs_buf_map_pages(bp, flags); 563 error = _xfs_buf_map_pages(bp, flags);
624 if (unlikely(error)) { 564 if (unlikely(error)) {
@@ -719,8 +659,7 @@ xfs_buf_readahead(
719{ 659{
720 struct backing_dev_info *bdi; 660 struct backing_dev_info *bdi;
721 661
722 bdi = target->bt_mapping->backing_dev_info; 662 if (bdi_read_congested(target->bt_bdi))
723 if (bdi_read_congested(bdi))
724 return; 663 return;
725 664
726 xfs_buf_read(target, ioff, isize, 665 xfs_buf_read(target, ioff, isize,
@@ -798,10 +737,10 @@ xfs_buf_associate_memory(
798 size_t buflen; 737 size_t buflen;
799 int page_count; 738 int page_count;
800 739
801 pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; 740 pageaddr = (unsigned long)mem & PAGE_MASK;
802 offset = (unsigned long)mem - pageaddr; 741 offset = (unsigned long)mem - pageaddr;
803 buflen = PAGE_CACHE_ALIGN(len + offset); 742 buflen = PAGE_ALIGN(len + offset);
804 page_count = buflen >> PAGE_CACHE_SHIFT; 743 page_count = buflen >> PAGE_SHIFT;
805 744
806 /* Free any previous set of page pointers */ 745 /* Free any previous set of page pointers */
807 if (bp->b_pages) 746 if (bp->b_pages)
@@ -818,13 +757,12 @@ xfs_buf_associate_memory(
818 757
819 for (i = 0; i < bp->b_page_count; i++) { 758 for (i = 0; i < bp->b_page_count; i++) {
820 bp->b_pages[i] = mem_to_page((void *)pageaddr); 759 bp->b_pages[i] = mem_to_page((void *)pageaddr);
821 pageaddr += PAGE_CACHE_SIZE; 760 pageaddr += PAGE_SIZE;
822 } 761 }
823 762
824 bp->b_count_desired = len; 763 bp->b_count_desired = len;
825 bp->b_buffer_length = buflen; 764 bp->b_buffer_length = buflen;
826 bp->b_flags |= XBF_MAPPED; 765 bp->b_flags |= XBF_MAPPED;
827 bp->b_flags &= ~_XBF_PAGE_LOCKED;
828 766
829 return 0; 767 return 0;
830} 768}
@@ -931,20 +869,7 @@ xfs_buf_rele(
931 869
932 870
933/* 871/*
934 * Mutual exclusion on buffers. Locking model: 872 * Lock a buffer object, if it is not already locked.
935 *
936 * Buffers associated with inodes for which buffer locking
937 * is not enabled are not protected by semaphores, and are
938 * assumed to be exclusively owned by the caller. There is a
939 * spinlock in the buffer, used by the caller when concurrent
940 * access is possible.
941 */
942
943/*
944 * Locks a buffer object, if it is not already locked. Note that this in
945 * no way locks the underlying pages, so it is only useful for
946 * synchronizing concurrent use of buffer objects, not for synchronizing
947 * independent access to the underlying pages.
948 * 873 *
949 * If we come across a stale, pinned, locked buffer, we know that we are 874 * If we come across a stale, pinned, locked buffer, we know that we are
950 * being asked to lock a buffer that has been reallocated. Because it is 875 * being asked to lock a buffer that has been reallocated. Because it is
@@ -978,10 +903,7 @@ xfs_buf_lock_value(
978} 903}
979 904
980/* 905/*
981 * Locks a buffer object. 906 * Lock a buffer object.
982 * Note that this in no way locks the underlying pages, so it is only
983 * useful for synchronizing concurrent use of buffer objects, not for
984 * synchronizing independent access to the underlying pages.
985 * 907 *
986 * If we come across a stale, pinned, locked buffer, we know that we 908 * If we come across a stale, pinned, locked buffer, we know that we
987 * are being asked to lock a buffer that has been reallocated. Because 909 * are being asked to lock a buffer that has been reallocated. Because
@@ -998,7 +920,7 @@ xfs_buf_lock(
998 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 920 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
999 xfs_log_force(bp->b_target->bt_mount, 0); 921 xfs_log_force(bp->b_target->bt_mount, 0);
1000 if (atomic_read(&bp->b_io_remaining)) 922 if (atomic_read(&bp->b_io_remaining))
1001 blk_run_address_space(bp->b_target->bt_mapping); 923 blk_run_backing_dev(bp->b_target->bt_bdi, NULL);
1002 down(&bp->b_sema); 924 down(&bp->b_sema);
1003 XB_SET_OWNER(bp); 925 XB_SET_OWNER(bp);
1004 926
@@ -1043,7 +965,7 @@ xfs_buf_wait_unpin(
1043 if (atomic_read(&bp->b_pin_count) == 0) 965 if (atomic_read(&bp->b_pin_count) == 0)
1044 break; 966 break;
1045 if (atomic_read(&bp->b_io_remaining)) 967 if (atomic_read(&bp->b_io_remaining))
1046 blk_run_address_space(bp->b_target->bt_mapping); 968 blk_run_backing_dev(bp->b_target->bt_bdi, NULL);
1047 schedule(); 969 schedule();
1048 } 970 }
1049 remove_wait_queue(&bp->b_waiters, &wait); 971 remove_wait_queue(&bp->b_waiters, &wait);
@@ -1256,10 +1178,8 @@ _xfs_buf_ioend(
1256 xfs_buf_t *bp, 1178 xfs_buf_t *bp,
1257 int schedule) 1179 int schedule)
1258{ 1180{
1259 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { 1181 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1260 bp->b_flags &= ~_XBF_PAGE_LOCKED;
1261 xfs_buf_ioend(bp, schedule); 1182 xfs_buf_ioend(bp, schedule);
1262 }
1263} 1183}
1264 1184
1265STATIC void 1185STATIC void
@@ -1268,35 +1188,12 @@ xfs_buf_bio_end_io(
1268 int error) 1188 int error)
1269{ 1189{
1270 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; 1190 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1271 unsigned int blocksize = bp->b_target->bt_bsize;
1272 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1273 1191
1274 xfs_buf_ioerror(bp, -error); 1192 xfs_buf_ioerror(bp, -error);
1275 1193
1276 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1194 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1277 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1195 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1278 1196
1279 do {
1280 struct page *page = bvec->bv_page;
1281
1282 ASSERT(!PagePrivate(page));
1283 if (unlikely(bp->b_error)) {
1284 if (bp->b_flags & XBF_READ)
1285 ClearPageUptodate(page);
1286 } else if (blocksize >= PAGE_CACHE_SIZE) {
1287 SetPageUptodate(page);
1288 } else if (!PagePrivate(page) &&
1289 (bp->b_flags & _XBF_PAGE_CACHE)) {
1290 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1291 }
1292
1293 if (--bvec >= bio->bi_io_vec)
1294 prefetchw(&bvec->bv_page->flags);
1295
1296 if (bp->b_flags & _XBF_PAGE_LOCKED)
1297 unlock_page(page);
1298 } while (bvec >= bio->bi_io_vec);
1299
1300 _xfs_buf_ioend(bp, 1); 1197 _xfs_buf_ioend(bp, 1);
1301 bio_put(bio); 1198 bio_put(bio);
1302} 1199}
@@ -1310,7 +1207,6 @@ _xfs_buf_ioapply(
1310 int offset = bp->b_offset; 1207 int offset = bp->b_offset;
1311 int size = bp->b_count_desired; 1208 int size = bp->b_count_desired;
1312 sector_t sector = bp->b_bn; 1209 sector_t sector = bp->b_bn;
1313 unsigned int blocksize = bp->b_target->bt_bsize;
1314 1210
1315 total_nr_pages = bp->b_page_count; 1211 total_nr_pages = bp->b_page_count;
1316 map_i = 0; 1212 map_i = 0;
@@ -1331,29 +1227,6 @@ _xfs_buf_ioapply(
1331 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1227 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
1332 } 1228 }
1333 1229
1334 /* Special code path for reading a sub page size buffer in --
1335 * we populate up the whole page, and hence the other metadata
1336 * in the same page. This optimization is only valid when the
1337 * filesystem block size is not smaller than the page size.
1338 */
1339 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
1340 ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
1341 (XBF_READ|_XBF_PAGE_LOCKED)) &&
1342 (blocksize >= PAGE_CACHE_SIZE)) {
1343 bio = bio_alloc(GFP_NOIO, 1);
1344
1345 bio->bi_bdev = bp->b_target->bt_bdev;
1346 bio->bi_sector = sector - (offset >> BBSHIFT);
1347 bio->bi_end_io = xfs_buf_bio_end_io;
1348 bio->bi_private = bp;
1349
1350 bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
1351 size = 0;
1352
1353 atomic_inc(&bp->b_io_remaining);
1354
1355 goto submit_io;
1356 }
1357 1230
1358next_chunk: 1231next_chunk:
1359 atomic_inc(&bp->b_io_remaining); 1232 atomic_inc(&bp->b_io_remaining);
@@ -1367,8 +1240,9 @@ next_chunk:
1367 bio->bi_end_io = xfs_buf_bio_end_io; 1240 bio->bi_end_io = xfs_buf_bio_end_io;
1368 bio->bi_private = bp; 1241 bio->bi_private = bp;
1369 1242
1243
1370 for (; size && nr_pages; nr_pages--, map_i++) { 1244 for (; size && nr_pages; nr_pages--, map_i++) {
1371 int rbytes, nbytes = PAGE_CACHE_SIZE - offset; 1245 int rbytes, nbytes = PAGE_SIZE - offset;
1372 1246
1373 if (nbytes > size) 1247 if (nbytes > size)
1374 nbytes = size; 1248 nbytes = size;
@@ -1383,7 +1257,6 @@ next_chunk:
1383 total_nr_pages--; 1257 total_nr_pages--;
1384 } 1258 }
1385 1259
1386submit_io:
1387 if (likely(bio->bi_size)) { 1260 if (likely(bio->bi_size)) {
1388 if (xfs_buf_is_vmapped(bp)) { 1261 if (xfs_buf_is_vmapped(bp)) {
1389 flush_kernel_vmap_range(bp->b_addr, 1262 flush_kernel_vmap_range(bp->b_addr,
@@ -1393,18 +1266,7 @@ submit_io:
1393 if (size) 1266 if (size)
1394 goto next_chunk; 1267 goto next_chunk;
1395 } else { 1268 } else {
1396 /*
1397 * if we get here, no pages were added to the bio. However,
1398 * we can't just error out here - if the pages are locked then
1399 * we have to unlock them otherwise we can hang on a later
1400 * access to the page.
1401 */
1402 xfs_buf_ioerror(bp, EIO); 1269 xfs_buf_ioerror(bp, EIO);
1403 if (bp->b_flags & _XBF_PAGE_LOCKED) {
1404 int i;
1405 for (i = 0; i < bp->b_page_count; i++)
1406 unlock_page(bp->b_pages[i]);
1407 }
1408 bio_put(bio); 1270 bio_put(bio);
1409 } 1271 }
1410} 1272}
@@ -1450,7 +1312,7 @@ xfs_buf_iowait(
1450 trace_xfs_buf_iowait(bp, _RET_IP_); 1312 trace_xfs_buf_iowait(bp, _RET_IP_);
1451 1313
1452 if (atomic_read(&bp->b_io_remaining)) 1314 if (atomic_read(&bp->b_io_remaining))
1453 blk_run_address_space(bp->b_target->bt_mapping); 1315 blk_run_backing_dev(bp->b_target->bt_bdi, NULL);
1454 wait_for_completion(&bp->b_iowait); 1316 wait_for_completion(&bp->b_iowait);
1455 1317
1456 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1318 trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1468,8 +1330,8 @@ xfs_buf_offset(
1468 return XFS_BUF_PTR(bp) + offset; 1330 return XFS_BUF_PTR(bp) + offset;
1469 1331
1470 offset += bp->b_offset; 1332 offset += bp->b_offset;
1471 page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; 1333 page = bp->b_pages[offset >> PAGE_SHIFT];
1472 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); 1334 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
1473} 1335}
1474 1336
1475/* 1337/*
@@ -1491,9 +1353,9 @@ xfs_buf_iomove(
1491 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; 1353 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
1492 cpoff = xfs_buf_poff(boff + bp->b_offset); 1354 cpoff = xfs_buf_poff(boff + bp->b_offset);
1493 csize = min_t(size_t, 1355 csize = min_t(size_t,
1494 PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); 1356 PAGE_SIZE-cpoff, bp->b_count_desired-boff);
1495 1357
1496 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); 1358 ASSERT(((csize + cpoff) <= PAGE_SIZE));
1497 1359
1498 switch (mode) { 1360 switch (mode) {
1499 case XBRW_ZERO: 1361 case XBRW_ZERO:
@@ -1606,7 +1468,6 @@ xfs_free_buftarg(
1606 xfs_flush_buftarg(btp, 1); 1468 xfs_flush_buftarg(btp, 1);
1607 if (mp->m_flags & XFS_MOUNT_BARRIER) 1469 if (mp->m_flags & XFS_MOUNT_BARRIER)
1608 xfs_blkdev_issue_flush(btp); 1470 xfs_blkdev_issue_flush(btp);
1609 iput(btp->bt_mapping->host);
1610 1471
1611 kthread_stop(btp->bt_task); 1472 kthread_stop(btp->bt_task);
1612 kmem_free(btp); 1473 kmem_free(btp);
@@ -1630,15 +1491,6 @@ xfs_setsize_buftarg_flags(
1630 return EINVAL; 1491 return EINVAL;
1631 } 1492 }
1632 1493
1633 if (verbose &&
1634 (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
1635 printk(KERN_WARNING
1636 "XFS: %u byte sectors in use on device %s. "
1637 "This is suboptimal; %u or greater is ideal.\n",
1638 sectorsize, XFS_BUFTARG_NAME(btp),
1639 (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
1640 }
1641
1642 return 0; 1494 return 0;
1643} 1495}
1644 1496
@@ -1653,7 +1505,7 @@ xfs_setsize_buftarg_early(
1653 struct block_device *bdev) 1505 struct block_device *bdev)
1654{ 1506{
1655 return xfs_setsize_buftarg_flags(btp, 1507 return xfs_setsize_buftarg_flags(btp,
1656 PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); 1508 PAGE_SIZE, bdev_logical_block_size(bdev), 0);
1657} 1509}
1658 1510
1659int 1511int
@@ -1666,41 +1518,6 @@ xfs_setsize_buftarg(
1666} 1518}
1667 1519
1668STATIC int 1520STATIC int
1669xfs_mapping_buftarg(
1670 xfs_buftarg_t *btp,
1671 struct block_device *bdev)
1672{
1673 struct backing_dev_info *bdi;
1674 struct inode *inode;
1675 struct address_space *mapping;
1676 static const struct address_space_operations mapping_aops = {
1677 .sync_page = block_sync_page,
1678 .migratepage = fail_migrate_page,
1679 };
1680
1681 inode = new_inode(bdev->bd_inode->i_sb);
1682 if (!inode) {
1683 printk(KERN_WARNING
1684 "XFS: Cannot allocate mapping inode for device %s\n",
1685 XFS_BUFTARG_NAME(btp));
1686 return ENOMEM;
1687 }
1688 inode->i_ino = get_next_ino();
1689 inode->i_mode = S_IFBLK;
1690 inode->i_bdev = bdev;
1691 inode->i_rdev = bdev->bd_dev;
1692 bdi = blk_get_backing_dev_info(bdev);
1693 if (!bdi)
1694 bdi = &default_backing_dev_info;
1695 mapping = &inode->i_data;
1696 mapping->a_ops = &mapping_aops;
1697 mapping->backing_dev_info = bdi;
1698 mapping_set_gfp_mask(mapping, GFP_NOFS);
1699 btp->bt_mapping = mapping;
1700 return 0;
1701}
1702
1703STATIC int
1704xfs_alloc_delwrite_queue( 1521xfs_alloc_delwrite_queue(
1705 xfs_buftarg_t *btp, 1522 xfs_buftarg_t *btp,
1706 const char *fsname) 1523 const char *fsname)
@@ -1728,12 +1545,14 @@ xfs_alloc_buftarg(
1728 btp->bt_mount = mp; 1545 btp->bt_mount = mp;
1729 btp->bt_dev = bdev->bd_dev; 1546 btp->bt_dev = bdev->bd_dev;
1730 btp->bt_bdev = bdev; 1547 btp->bt_bdev = bdev;
1548 btp->bt_bdi = blk_get_backing_dev_info(bdev);
1549 if (!btp->bt_bdi)
1550 goto error;
1551
1731 INIT_LIST_HEAD(&btp->bt_lru); 1552 INIT_LIST_HEAD(&btp->bt_lru);
1732 spin_lock_init(&btp->bt_lru_lock); 1553 spin_lock_init(&btp->bt_lru_lock);
1733 if (xfs_setsize_buftarg_early(btp, bdev)) 1554 if (xfs_setsize_buftarg_early(btp, bdev))
1734 goto error; 1555 goto error;
1735 if (xfs_mapping_buftarg(btp, bdev))
1736 goto error;
1737 if (xfs_alloc_delwrite_queue(btp, fsname)) 1556 if (xfs_alloc_delwrite_queue(btp, fsname))
1738 goto error; 1557 goto error;
1739 btp->bt_shrinker.shrink = xfs_buftarg_shrink; 1558 btp->bt_shrinker.shrink = xfs_buftarg_shrink;
@@ -1955,7 +1774,7 @@ xfsbufd(
1955 count++; 1774 count++;
1956 } 1775 }
1957 if (count) 1776 if (count)
1958 blk_run_address_space(target->bt_mapping); 1777 blk_run_backing_dev(target->bt_bdi, NULL);
1959 1778
1960 } while (!kthread_should_stop()); 1779 } while (!kthread_should_stop());
1961 1780
@@ -2003,7 +1822,7 @@ xfs_flush_buftarg(
2003 1822
2004 if (wait) { 1823 if (wait) {
2005 /* Expedite and wait for IO to complete. */ 1824 /* Expedite and wait for IO to complete. */
2006 blk_run_address_space(target->bt_mapping); 1825 blk_run_backing_dev(target->bt_bdi, NULL);
2007 while (!list_empty(&wait_list)) { 1826 while (!list_empty(&wait_list)) {
2008 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1827 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2009 1828
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index cbe65950e524..a9a1c4512645 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -61,30 +61,11 @@ typedef enum {
61#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ 61#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
62 62
63/* flags used only internally */ 63/* flags used only internally */
64#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
65#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ 64#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
66#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ 65#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
66#define _XBF_KMEM (1 << 20)/* backed by heap memory */
67#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ 67#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
68 68
69/*
70 * Special flag for supporting metadata blocks smaller than a FSB.
71 *
72 * In this case we can have multiple xfs_buf_t on a single page and
73 * need to lock out concurrent xfs_buf_t readers as they only
74 * serialise access to the buffer.
75 *
76 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
77 * between reads of the page. Hence we can have one thread read the
78 * page and modify it, but then race with another thread that thinks
79 * the page is not up-to-date and hence reads it again.
80 *
81 * The result is that the first modifcation to the page is lost.
82 * This sort of AGF/AGI reading race can happen when unlinking inodes
83 * that require truncation and results in the AGI unlinked list
84 * modifications being lost.
85 */
86#define _XBF_PAGE_LOCKED (1 << 22)
87
88typedef unsigned int xfs_buf_flags_t; 69typedef unsigned int xfs_buf_flags_t;
89 70
90#define XFS_BUF_FLAGS \ 71#define XFS_BUF_FLAGS \
@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t;
100 { XBF_LOCK, "LOCK" }, /* should never be set */\ 81 { XBF_LOCK, "LOCK" }, /* should never be set */\
101 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ 82 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
102 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ 83 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
103 { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \
104 { _XBF_PAGES, "PAGES" }, \ 84 { _XBF_PAGES, "PAGES" }, \
105 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ 85 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
106 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 86 { _XBF_KMEM, "KMEM" }, \
107 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } 87 { _XBF_DELWRI_Q, "DELWRI_Q" }
108
109 88
110typedef enum { 89typedef enum {
111 XBT_FORCE_SLEEP = 0, 90 XBT_FORCE_SLEEP = 0,
@@ -120,7 +99,7 @@ typedef struct xfs_bufhash {
120typedef struct xfs_buftarg { 99typedef struct xfs_buftarg {
121 dev_t bt_dev; 100 dev_t bt_dev;
122 struct block_device *bt_bdev; 101 struct block_device *bt_bdev;
123 struct address_space *bt_mapping; 102 struct backing_dev_info *bt_bdi;
124 struct xfs_mount *bt_mount; 103 struct xfs_mount *bt_mount;
125 unsigned int bt_bsize; 104 unsigned int bt_bsize;
126 unsigned int bt_sshift; 105 unsigned int bt_sshift;
@@ -139,17 +118,6 @@ typedef struct xfs_buftarg {
139 unsigned int bt_lru_nr; 118 unsigned int bt_lru_nr;
140} xfs_buftarg_t; 119} xfs_buftarg_t;
141 120
142/*
143 * xfs_buf_t: Buffer structure for pagecache-based buffers
144 *
145 * This buffer structure is used by the pagecache buffer management routines
146 * to refer to an assembly of pages forming a logical buffer.
147 *
148 * The buffer structure is used on a temporary basis only, and discarded when
149 * released. The real data storage is recorded in the pagecache. Buffers are
150 * hashed to the block device on which the file system resides.
151 */
152
153struct xfs_buf; 121struct xfs_buf;
154typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); 122typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
155 123