aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r--fs/xfs/xfs_buf.c240
1 files changed, 176 insertions, 64 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 269b35c084da..d7a9dd735e1e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -164,14 +164,49 @@ xfs_buf_stale(
164 ASSERT(atomic_read(&bp->b_hold) >= 1); 164 ASSERT(atomic_read(&bp->b_hold) >= 1);
165} 165}
166 166
167static int
168xfs_buf_get_maps(
169 struct xfs_buf *bp,
170 int map_count)
171{
172 ASSERT(bp->b_maps == NULL);
173 bp->b_map_count = map_count;
174
175 if (map_count == 1) {
176 bp->b_maps = &bp->b_map;
177 return 0;
178 }
179
180 bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
181 KM_NOFS);
182 if (!bp->b_maps)
183 return ENOMEM;
184 return 0;
185}
186
187/*
188 * Frees b_pages if it was allocated.
189 */
190static void
191xfs_buf_free_maps(
192 struct xfs_buf *bp)
193{
194 if (bp->b_maps != &bp->b_map) {
195 kmem_free(bp->b_maps);
196 bp->b_maps = NULL;
197 }
198}
199
167struct xfs_buf * 200struct xfs_buf *
168xfs_buf_alloc( 201_xfs_buf_alloc(
169 struct xfs_buftarg *target, 202 struct xfs_buftarg *target,
170 xfs_daddr_t blkno, 203 struct xfs_buf_map *map,
171 size_t numblks, 204 int nmaps,
172 xfs_buf_flags_t flags) 205 xfs_buf_flags_t flags)
173{ 206{
174 struct xfs_buf *bp; 207 struct xfs_buf *bp;
208 int error;
209 int i;
175 210
176 bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); 211 bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
177 if (unlikely(!bp)) 212 if (unlikely(!bp))
@@ -192,16 +227,28 @@ xfs_buf_alloc(
192 sema_init(&bp->b_sema, 0); /* held, no waiters */ 227 sema_init(&bp->b_sema, 0); /* held, no waiters */
193 XB_SET_OWNER(bp); 228 XB_SET_OWNER(bp);
194 bp->b_target = target; 229 bp->b_target = target;
230 bp->b_flags = flags;
195 231
196 /* 232 /*
197 * Set length and io_length to the same value initially. 233 * Set length and io_length to the same value initially.
198 * I/O routines should use io_length, which will be the same in 234 * I/O routines should use io_length, which will be the same in
199 * most cases but may be reset (e.g. XFS recovery). 235 * most cases but may be reset (e.g. XFS recovery).
200 */ 236 */
201 bp->b_length = numblks; 237 error = xfs_buf_get_maps(bp, nmaps);
202 bp->b_io_length = numblks; 238 if (error) {
203 bp->b_flags = flags; 239 kmem_zone_free(xfs_buf_zone, bp);
204 bp->b_bn = blkno; 240 return NULL;
241 }
242
243 bp->b_bn = map[0].bm_bn;
244 bp->b_length = 0;
245 for (i = 0; i < nmaps; i++) {
246 bp->b_maps[i].bm_bn = map[i].bm_bn;
247 bp->b_maps[i].bm_len = map[i].bm_len;
248 bp->b_length += map[i].bm_len;
249 }
250 bp->b_io_length = bp->b_length;
251
205 atomic_set(&bp->b_pin_count, 0); 252 atomic_set(&bp->b_pin_count, 0);
206 init_waitqueue_head(&bp->b_waiters); 253 init_waitqueue_head(&bp->b_waiters);
207 254
@@ -280,6 +327,7 @@ xfs_buf_free(
280 } else if (bp->b_flags & _XBF_KMEM) 327 } else if (bp->b_flags & _XBF_KMEM)
281 kmem_free(bp->b_addr); 328 kmem_free(bp->b_addr);
282 _xfs_buf_free_pages(bp); 329 _xfs_buf_free_pages(bp);
330 xfs_buf_free_maps(bp);
283 kmem_zone_free(xfs_buf_zone, bp); 331 kmem_zone_free(xfs_buf_zone, bp);
284} 332}
285 333
@@ -327,8 +375,9 @@ xfs_buf_allocate_memory(
327 } 375 }
328 376
329use_alloc_page: 377use_alloc_page:
330 start = BBTOB(bp->b_bn) >> PAGE_SHIFT; 378 start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT;
331 end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT; 379 end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1)
380 >> PAGE_SHIFT;
332 page_count = end - start; 381 page_count = end - start;
333 error = _xfs_buf_get_pages(bp, page_count, flags); 382 error = _xfs_buf_get_pages(bp, page_count, flags);
334 if (unlikely(error)) 383 if (unlikely(error))
@@ -425,8 +474,8 @@ _xfs_buf_map_pages(
425xfs_buf_t * 474xfs_buf_t *
426_xfs_buf_find( 475_xfs_buf_find(
427 struct xfs_buftarg *btp, 476 struct xfs_buftarg *btp,
428 xfs_daddr_t blkno, 477 struct xfs_buf_map *map,
429 size_t numblks, 478 int nmaps,
430 xfs_buf_flags_t flags, 479 xfs_buf_flags_t flags,
431 xfs_buf_t *new_bp) 480 xfs_buf_t *new_bp)
432{ 481{
@@ -435,7 +484,12 @@ _xfs_buf_find(
435 struct rb_node **rbp; 484 struct rb_node **rbp;
436 struct rb_node *parent; 485 struct rb_node *parent;
437 xfs_buf_t *bp; 486 xfs_buf_t *bp;
487 xfs_daddr_t blkno = map[0].bm_bn;
488 int numblks = 0;
489 int i;
438 490
491 for (i = 0; i < nmaps; i++)
492 numblks += map[i].bm_len;
439 numbytes = BBTOB(numblks); 493 numbytes = BBTOB(numblks);
440 494
441 /* Check for IOs smaller than the sector size / not sector aligned */ 495 /* Check for IOs smaller than the sector size / not sector aligned */
@@ -527,31 +581,31 @@ found:
527 * more hits than misses. 581 * more hits than misses.
528 */ 582 */
529struct xfs_buf * 583struct xfs_buf *
530xfs_buf_get( 584xfs_buf_get_map(
531 xfs_buftarg_t *target, 585 struct xfs_buftarg *target,
532 xfs_daddr_t blkno, 586 struct xfs_buf_map *map,
533 size_t numblks, 587 int nmaps,
534 xfs_buf_flags_t flags) 588 xfs_buf_flags_t flags)
535{ 589{
536 struct xfs_buf *bp; 590 struct xfs_buf *bp;
537 struct xfs_buf *new_bp; 591 struct xfs_buf *new_bp;
538 int error = 0; 592 int error = 0;
539 593
540 bp = _xfs_buf_find(target, blkno, numblks, flags, NULL); 594 bp = _xfs_buf_find(target, map, nmaps, flags, NULL);
541 if (likely(bp)) 595 if (likely(bp))
542 goto found; 596 goto found;
543 597
544 new_bp = xfs_buf_alloc(target, blkno, numblks, flags); 598 new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
545 if (unlikely(!new_bp)) 599 if (unlikely(!new_bp))
546 return NULL; 600 return NULL;
547 601
548 error = xfs_buf_allocate_memory(new_bp, flags); 602 error = xfs_buf_allocate_memory(new_bp, flags);
549 if (error) { 603 if (error) {
550 kmem_zone_free(xfs_buf_zone, new_bp); 604 xfs_buf_free(new_bp);
551 return NULL; 605 return NULL;
552 } 606 }
553 607
554 bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp); 608 bp = _xfs_buf_find(target, map, nmaps, flags, new_bp);
555 if (!bp) { 609 if (!bp) {
556 xfs_buf_free(new_bp); 610 xfs_buf_free(new_bp);
557 return NULL; 611 return NULL;
@@ -560,8 +614,6 @@ xfs_buf_get(
560 if (bp != new_bp) 614 if (bp != new_bp)
561 xfs_buf_free(new_bp); 615 xfs_buf_free(new_bp);
562 616
563 bp->b_io_length = bp->b_length;
564
565found: 617found:
566 if (!bp->b_addr) { 618 if (!bp->b_addr) {
567 error = _xfs_buf_map_pages(bp, flags); 619 error = _xfs_buf_map_pages(bp, flags);
@@ -584,7 +636,7 @@ _xfs_buf_read(
584 xfs_buf_flags_t flags) 636 xfs_buf_flags_t flags)
585{ 637{
586 ASSERT(!(flags & XBF_WRITE)); 638 ASSERT(!(flags & XBF_WRITE));
587 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 639 ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL);
588 640
589 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); 641 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
590 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); 642 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
@@ -596,17 +648,17 @@ _xfs_buf_read(
596} 648}
597 649
598xfs_buf_t * 650xfs_buf_t *
599xfs_buf_read( 651xfs_buf_read_map(
600 xfs_buftarg_t *target, 652 struct xfs_buftarg *target,
601 xfs_daddr_t blkno, 653 struct xfs_buf_map *map,
602 size_t numblks, 654 int nmaps,
603 xfs_buf_flags_t flags) 655 xfs_buf_flags_t flags)
604{ 656{
605 xfs_buf_t *bp; 657 struct xfs_buf *bp;
606 658
607 flags |= XBF_READ; 659 flags |= XBF_READ;
608 660
609 bp = xfs_buf_get(target, blkno, numblks, flags); 661 bp = xfs_buf_get_map(target, map, nmaps, flags);
610 if (bp) { 662 if (bp) {
611 trace_xfs_buf_read(bp, flags, _RET_IP_); 663 trace_xfs_buf_read(bp, flags, _RET_IP_);
612 664
@@ -634,15 +686,15 @@ xfs_buf_read(
634 * safe manner. 686 * safe manner.
635 */ 687 */
636void 688void
637xfs_buf_readahead( 689xfs_buf_readahead_map(
638 xfs_buftarg_t *target, 690 struct xfs_buftarg *target,
639 xfs_daddr_t blkno, 691 struct xfs_buf_map *map,
640 size_t numblks) 692 int nmaps)
641{ 693{
642 if (bdi_read_congested(target->bt_bdi)) 694 if (bdi_read_congested(target->bt_bdi))
643 return; 695 return;
644 696
645 xfs_buf_read(target, blkno, numblks, 697 xfs_buf_read_map(target, map, nmaps,
646 XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); 698 XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
647} 699}
648 700
@@ -665,8 +717,10 @@ xfs_buf_read_uncached(
665 return NULL; 717 return NULL;
666 718
667 /* set up the buffer for a read IO */ 719 /* set up the buffer for a read IO */
668 XFS_BUF_SET_ADDR(bp, daddr); 720 ASSERT(bp->b_map_count == 1);
669 XFS_BUF_READ(bp); 721 bp->b_bn = daddr;
722 bp->b_maps[0].bm_bn = daddr;
723 bp->b_flags |= XBF_READ;
670 724
671 xfsbdstrat(target->bt_mount, bp); 725 xfsbdstrat(target->bt_mount, bp);
672 error = xfs_buf_iowait(bp); 726 error = xfs_buf_iowait(bp);
@@ -694,7 +748,11 @@ xfs_buf_set_empty(
694 bp->b_addr = NULL; 748 bp->b_addr = NULL;
695 bp->b_length = numblks; 749 bp->b_length = numblks;
696 bp->b_io_length = numblks; 750 bp->b_io_length = numblks;
751
752 ASSERT(bp->b_map_count == 1);
697 bp->b_bn = XFS_BUF_DADDR_NULL; 753 bp->b_bn = XFS_BUF_DADDR_NULL;
754 bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
755 bp->b_maps[0].bm_len = bp->b_length;
698} 756}
699 757
700static inline struct page * 758static inline struct page *
@@ -758,9 +816,10 @@ xfs_buf_get_uncached(
758{ 816{
759 unsigned long page_count; 817 unsigned long page_count;
760 int error, i; 818 int error, i;
761 xfs_buf_t *bp; 819 struct xfs_buf *bp;
820 DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
762 821
763 bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0); 822 bp = _xfs_buf_alloc(target, &map, 1, 0);
764 if (unlikely(bp == NULL)) 823 if (unlikely(bp == NULL))
765 goto fail; 824 goto fail;
766 825
@@ -791,6 +850,7 @@ xfs_buf_get_uncached(
791 __free_page(bp->b_pages[i]); 850 __free_page(bp->b_pages[i]);
792 _xfs_buf_free_pages(bp); 851 _xfs_buf_free_pages(bp);
793 fail_free_buf: 852 fail_free_buf:
853 xfs_buf_free_maps(bp);
794 kmem_zone_free(xfs_buf_zone, bp); 854 kmem_zone_free(xfs_buf_zone, bp);
795 fail: 855 fail:
796 return NULL; 856 return NULL;
@@ -1144,36 +1204,39 @@ xfs_buf_bio_end_io(
1144 bio_put(bio); 1204 bio_put(bio);
1145} 1205}
1146 1206
1147STATIC void 1207static void
1148_xfs_buf_ioapply( 1208xfs_buf_ioapply_map(
1149 xfs_buf_t *bp) 1209 struct xfs_buf *bp,
1210 int map,
1211 int *buf_offset,
1212 int *count,
1213 int rw)
1150{ 1214{
1151 int rw, map_i, total_nr_pages, nr_pages; 1215 int page_index;
1152 struct bio *bio; 1216 int total_nr_pages = bp->b_page_count;
1153 int offset = bp->b_offset; 1217 int nr_pages;
1154 int size = BBTOB(bp->b_io_length); 1218 struct bio *bio;
1155 sector_t sector = bp->b_bn; 1219 sector_t sector = bp->b_maps[map].bm_bn;
1220 int size;
1221 int offset;
1156 1222
1157 total_nr_pages = bp->b_page_count; 1223 total_nr_pages = bp->b_page_count;
1158 map_i = 0;
1159 1224
1160 if (bp->b_flags & XBF_WRITE) { 1225 /* skip the pages in the buffer before the start offset */
1161 if (bp->b_flags & XBF_SYNCIO) 1226 page_index = 0;
1162 rw = WRITE_SYNC; 1227 offset = *buf_offset;
1163 else 1228 while (offset >= PAGE_SIZE) {
1164 rw = WRITE; 1229 page_index++;
1165 if (bp->b_flags & XBF_FUA) 1230 offset -= PAGE_SIZE;
1166 rw |= REQ_FUA;
1167 if (bp->b_flags & XBF_FLUSH)
1168 rw |= REQ_FLUSH;
1169 } else if (bp->b_flags & XBF_READ_AHEAD) {
1170 rw = READA;
1171 } else {
1172 rw = READ;
1173 } 1231 }
1174 1232
1175 /* we only use the buffer cache for meta-data */ 1233 /*
1176 rw |= REQ_META; 1234 * Limit the IO size to the length of the current vector, and update the
1235 * remaining IO count for the next time around.
1236 */
1237 size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
1238 *count -= size;
1239 *buf_offset += size;
1177 1240
1178next_chunk: 1241next_chunk:
1179 atomic_inc(&bp->b_io_remaining); 1242 atomic_inc(&bp->b_io_remaining);
@@ -1188,13 +1251,14 @@ next_chunk:
1188 bio->bi_private = bp; 1251 bio->bi_private = bp;
1189 1252
1190 1253
1191 for (; size && nr_pages; nr_pages--, map_i++) { 1254 for (; size && nr_pages; nr_pages--, page_index++) {
1192 int rbytes, nbytes = PAGE_SIZE - offset; 1255 int rbytes, nbytes = PAGE_SIZE - offset;
1193 1256
1194 if (nbytes > size) 1257 if (nbytes > size)
1195 nbytes = size; 1258 nbytes = size;
1196 1259
1197 rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); 1260 rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
1261 offset);
1198 if (rbytes < nbytes) 1262 if (rbytes < nbytes)
1199 break; 1263 break;
1200 1264
@@ -1216,6 +1280,54 @@ next_chunk:
1216 xfs_buf_ioerror(bp, EIO); 1280 xfs_buf_ioerror(bp, EIO);
1217 bio_put(bio); 1281 bio_put(bio);
1218 } 1282 }
1283
1284}
1285
1286STATIC void
1287_xfs_buf_ioapply(
1288 struct xfs_buf *bp)
1289{
1290 struct blk_plug plug;
1291 int rw;
1292 int offset;
1293 int size;
1294 int i;
1295
1296 if (bp->b_flags & XBF_WRITE) {
1297 if (bp->b_flags & XBF_SYNCIO)
1298 rw = WRITE_SYNC;
1299 else
1300 rw = WRITE;
1301 if (bp->b_flags & XBF_FUA)
1302 rw |= REQ_FUA;
1303 if (bp->b_flags & XBF_FLUSH)
1304 rw |= REQ_FLUSH;
1305 } else if (bp->b_flags & XBF_READ_AHEAD) {
1306 rw = READA;
1307 } else {
1308 rw = READ;
1309 }
1310
1311 /* we only use the buffer cache for meta-data */
1312 rw |= REQ_META;
1313
1314 /*
1315 * Walk all the vectors issuing IO on them. Set up the initial offset
1316 * into the buffer and the desired IO size before we start -
1317 * _xfs_buf_ioapply_vec() will modify them appropriately for each
1318 * subsequent call.
1319 */
1320 offset = bp->b_offset;
1321 size = BBTOB(bp->b_io_length);
1322 blk_start_plug(&plug);
1323 for (i = 0; i < bp->b_map_count; i++) {
1324 xfs_buf_ioapply_map(bp, i, &offset, &size, rw);
1325 if (bp->b_error)
1326 break;
1327 if (size <= 0)
1328 break; /* all done */
1329 }
1330 blk_finish_plug(&plug);
1219} 1331}
1220 1332
1221void 1333void
@@ -1557,7 +1669,7 @@ xfs_buf_cmp(
1557 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); 1669 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1558 xfs_daddr_t diff; 1670 xfs_daddr_t diff;
1559 1671
1560 diff = ap->b_bn - bp->b_bn; 1672 diff = ap->b_map.bm_bn - bp->b_map.bm_bn;
1561 if (diff < 0) 1673 if (diff < 0)
1562 return -1; 1674 return -1;
1563 if (diff > 0) 1675 if (diff > 0)