diff options
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r-- | fs/xfs/xfs_buf.c | 240 |
1 files changed, 176 insertions, 64 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 269b35c084da..d7a9dd735e1e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -164,14 +164,49 @@ xfs_buf_stale( | |||
164 | ASSERT(atomic_read(&bp->b_hold) >= 1); | 164 | ASSERT(atomic_read(&bp->b_hold) >= 1); |
165 | } | 165 | } |
166 | 166 | ||
167 | static int | ||
168 | xfs_buf_get_maps( | ||
169 | struct xfs_buf *bp, | ||
170 | int map_count) | ||
171 | { | ||
172 | ASSERT(bp->b_maps == NULL); | ||
173 | bp->b_map_count = map_count; | ||
174 | |||
175 | if (map_count == 1) { | ||
176 | bp->b_maps = &bp->b_map; | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map), | ||
181 | KM_NOFS); | ||
182 | if (!bp->b_maps) | ||
183 | return ENOMEM; | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * Frees b_pages if it was allocated. | ||
189 | */ | ||
190 | static void | ||
191 | xfs_buf_free_maps( | ||
192 | struct xfs_buf *bp) | ||
193 | { | ||
194 | if (bp->b_maps != &bp->b_map) { | ||
195 | kmem_free(bp->b_maps); | ||
196 | bp->b_maps = NULL; | ||
197 | } | ||
198 | } | ||
199 | |||
167 | struct xfs_buf * | 200 | struct xfs_buf * |
168 | xfs_buf_alloc( | 201 | _xfs_buf_alloc( |
169 | struct xfs_buftarg *target, | 202 | struct xfs_buftarg *target, |
170 | xfs_daddr_t blkno, | 203 | struct xfs_buf_map *map, |
171 | size_t numblks, | 204 | int nmaps, |
172 | xfs_buf_flags_t flags) | 205 | xfs_buf_flags_t flags) |
173 | { | 206 | { |
174 | struct xfs_buf *bp; | 207 | struct xfs_buf *bp; |
208 | int error; | ||
209 | int i; | ||
175 | 210 | ||
176 | bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); | 211 | bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); |
177 | if (unlikely(!bp)) | 212 | if (unlikely(!bp)) |
@@ -192,16 +227,28 @@ xfs_buf_alloc( | |||
192 | sema_init(&bp->b_sema, 0); /* held, no waiters */ | 227 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
193 | XB_SET_OWNER(bp); | 228 | XB_SET_OWNER(bp); |
194 | bp->b_target = target; | 229 | bp->b_target = target; |
230 | bp->b_flags = flags; | ||
195 | 231 | ||
196 | /* | 232 | /* |
197 | * Set length and io_length to the same value initially. | 233 | * Set length and io_length to the same value initially. |
198 | * I/O routines should use io_length, which will be the same in | 234 | * I/O routines should use io_length, which will be the same in |
199 | * most cases but may be reset (e.g. XFS recovery). | 235 | * most cases but may be reset (e.g. XFS recovery). |
200 | */ | 236 | */ |
201 | bp->b_length = numblks; | 237 | error = xfs_buf_get_maps(bp, nmaps); |
202 | bp->b_io_length = numblks; | 238 | if (error) { |
203 | bp->b_flags = flags; | 239 | kmem_zone_free(xfs_buf_zone, bp); |
204 | bp->b_bn = blkno; | 240 | return NULL; |
241 | } | ||
242 | |||
243 | bp->b_bn = map[0].bm_bn; | ||
244 | bp->b_length = 0; | ||
245 | for (i = 0; i < nmaps; i++) { | ||
246 | bp->b_maps[i].bm_bn = map[i].bm_bn; | ||
247 | bp->b_maps[i].bm_len = map[i].bm_len; | ||
248 | bp->b_length += map[i].bm_len; | ||
249 | } | ||
250 | bp->b_io_length = bp->b_length; | ||
251 | |||
205 | atomic_set(&bp->b_pin_count, 0); | 252 | atomic_set(&bp->b_pin_count, 0); |
206 | init_waitqueue_head(&bp->b_waiters); | 253 | init_waitqueue_head(&bp->b_waiters); |
207 | 254 | ||
@@ -280,6 +327,7 @@ xfs_buf_free( | |||
280 | } else if (bp->b_flags & _XBF_KMEM) | 327 | } else if (bp->b_flags & _XBF_KMEM) |
281 | kmem_free(bp->b_addr); | 328 | kmem_free(bp->b_addr); |
282 | _xfs_buf_free_pages(bp); | 329 | _xfs_buf_free_pages(bp); |
330 | xfs_buf_free_maps(bp); | ||
283 | kmem_zone_free(xfs_buf_zone, bp); | 331 | kmem_zone_free(xfs_buf_zone, bp); |
284 | } | 332 | } |
285 | 333 | ||
@@ -327,8 +375,9 @@ xfs_buf_allocate_memory( | |||
327 | } | 375 | } |
328 | 376 | ||
329 | use_alloc_page: | 377 | use_alloc_page: |
330 | start = BBTOB(bp->b_bn) >> PAGE_SHIFT; | 378 | start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT; |
331 | end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT; | 379 | end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1) |
380 | >> PAGE_SHIFT; | ||
332 | page_count = end - start; | 381 | page_count = end - start; |
333 | error = _xfs_buf_get_pages(bp, page_count, flags); | 382 | error = _xfs_buf_get_pages(bp, page_count, flags); |
334 | if (unlikely(error)) | 383 | if (unlikely(error)) |
@@ -425,8 +474,8 @@ _xfs_buf_map_pages( | |||
425 | xfs_buf_t * | 474 | xfs_buf_t * |
426 | _xfs_buf_find( | 475 | _xfs_buf_find( |
427 | struct xfs_buftarg *btp, | 476 | struct xfs_buftarg *btp, |
428 | xfs_daddr_t blkno, | 477 | struct xfs_buf_map *map, |
429 | size_t numblks, | 478 | int nmaps, |
430 | xfs_buf_flags_t flags, | 479 | xfs_buf_flags_t flags, |
431 | xfs_buf_t *new_bp) | 480 | xfs_buf_t *new_bp) |
432 | { | 481 | { |
@@ -435,7 +484,12 @@ _xfs_buf_find( | |||
435 | struct rb_node **rbp; | 484 | struct rb_node **rbp; |
436 | struct rb_node *parent; | 485 | struct rb_node *parent; |
437 | xfs_buf_t *bp; | 486 | xfs_buf_t *bp; |
487 | xfs_daddr_t blkno = map[0].bm_bn; | ||
488 | int numblks = 0; | ||
489 | int i; | ||
438 | 490 | ||
491 | for (i = 0; i < nmaps; i++) | ||
492 | numblks += map[i].bm_len; | ||
439 | numbytes = BBTOB(numblks); | 493 | numbytes = BBTOB(numblks); |
440 | 494 | ||
441 | /* Check for IOs smaller than the sector size / not sector aligned */ | 495 | /* Check for IOs smaller than the sector size / not sector aligned */ |
@@ -527,31 +581,31 @@ found: | |||
527 | * more hits than misses. | 581 | * more hits than misses. |
528 | */ | 582 | */ |
529 | struct xfs_buf * | 583 | struct xfs_buf * |
530 | xfs_buf_get( | 584 | xfs_buf_get_map( |
531 | xfs_buftarg_t *target, | 585 | struct xfs_buftarg *target, |
532 | xfs_daddr_t blkno, | 586 | struct xfs_buf_map *map, |
533 | size_t numblks, | 587 | int nmaps, |
534 | xfs_buf_flags_t flags) | 588 | xfs_buf_flags_t flags) |
535 | { | 589 | { |
536 | struct xfs_buf *bp; | 590 | struct xfs_buf *bp; |
537 | struct xfs_buf *new_bp; | 591 | struct xfs_buf *new_bp; |
538 | int error = 0; | 592 | int error = 0; |
539 | 593 | ||
540 | bp = _xfs_buf_find(target, blkno, numblks, flags, NULL); | 594 | bp = _xfs_buf_find(target, map, nmaps, flags, NULL); |
541 | if (likely(bp)) | 595 | if (likely(bp)) |
542 | goto found; | 596 | goto found; |
543 | 597 | ||
544 | new_bp = xfs_buf_alloc(target, blkno, numblks, flags); | 598 | new_bp = _xfs_buf_alloc(target, map, nmaps, flags); |
545 | if (unlikely(!new_bp)) | 599 | if (unlikely(!new_bp)) |
546 | return NULL; | 600 | return NULL; |
547 | 601 | ||
548 | error = xfs_buf_allocate_memory(new_bp, flags); | 602 | error = xfs_buf_allocate_memory(new_bp, flags); |
549 | if (error) { | 603 | if (error) { |
550 | kmem_zone_free(xfs_buf_zone, new_bp); | 604 | xfs_buf_free(new_bp); |
551 | return NULL; | 605 | return NULL; |
552 | } | 606 | } |
553 | 607 | ||
554 | bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp); | 608 | bp = _xfs_buf_find(target, map, nmaps, flags, new_bp); |
555 | if (!bp) { | 609 | if (!bp) { |
556 | xfs_buf_free(new_bp); | 610 | xfs_buf_free(new_bp); |
557 | return NULL; | 611 | return NULL; |
@@ -560,8 +614,6 @@ xfs_buf_get( | |||
560 | if (bp != new_bp) | 614 | if (bp != new_bp) |
561 | xfs_buf_free(new_bp); | 615 | xfs_buf_free(new_bp); |
562 | 616 | ||
563 | bp->b_io_length = bp->b_length; | ||
564 | |||
565 | found: | 617 | found: |
566 | if (!bp->b_addr) { | 618 | if (!bp->b_addr) { |
567 | error = _xfs_buf_map_pages(bp, flags); | 619 | error = _xfs_buf_map_pages(bp, flags); |
@@ -584,7 +636,7 @@ _xfs_buf_read( | |||
584 | xfs_buf_flags_t flags) | 636 | xfs_buf_flags_t flags) |
585 | { | 637 | { |
586 | ASSERT(!(flags & XBF_WRITE)); | 638 | ASSERT(!(flags & XBF_WRITE)); |
587 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); | 639 | ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL); |
588 | 640 | ||
589 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); | 641 | bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); |
590 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); | 642 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); |
@@ -596,17 +648,17 @@ _xfs_buf_read( | |||
596 | } | 648 | } |
597 | 649 | ||
598 | xfs_buf_t * | 650 | xfs_buf_t * |
599 | xfs_buf_read( | 651 | xfs_buf_read_map( |
600 | xfs_buftarg_t *target, | 652 | struct xfs_buftarg *target, |
601 | xfs_daddr_t blkno, | 653 | struct xfs_buf_map *map, |
602 | size_t numblks, | 654 | int nmaps, |
603 | xfs_buf_flags_t flags) | 655 | xfs_buf_flags_t flags) |
604 | { | 656 | { |
605 | xfs_buf_t *bp; | 657 | struct xfs_buf *bp; |
606 | 658 | ||
607 | flags |= XBF_READ; | 659 | flags |= XBF_READ; |
608 | 660 | ||
609 | bp = xfs_buf_get(target, blkno, numblks, flags); | 661 | bp = xfs_buf_get_map(target, map, nmaps, flags); |
610 | if (bp) { | 662 | if (bp) { |
611 | trace_xfs_buf_read(bp, flags, _RET_IP_); | 663 | trace_xfs_buf_read(bp, flags, _RET_IP_); |
612 | 664 | ||
@@ -634,15 +686,15 @@ xfs_buf_read( | |||
634 | * safe manner. | 686 | * safe manner. |
635 | */ | 687 | */ |
636 | void | 688 | void |
637 | xfs_buf_readahead( | 689 | xfs_buf_readahead_map( |
638 | xfs_buftarg_t *target, | 690 | struct xfs_buftarg *target, |
639 | xfs_daddr_t blkno, | 691 | struct xfs_buf_map *map, |
640 | size_t numblks) | 692 | int nmaps) |
641 | { | 693 | { |
642 | if (bdi_read_congested(target->bt_bdi)) | 694 | if (bdi_read_congested(target->bt_bdi)) |
643 | return; | 695 | return; |
644 | 696 | ||
645 | xfs_buf_read(target, blkno, numblks, | 697 | xfs_buf_read_map(target, map, nmaps, |
646 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); | 698 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); |
647 | } | 699 | } |
648 | 700 | ||
@@ -665,8 +717,10 @@ xfs_buf_read_uncached( | |||
665 | return NULL; | 717 | return NULL; |
666 | 718 | ||
667 | /* set up the buffer for a read IO */ | 719 | /* set up the buffer for a read IO */ |
668 | XFS_BUF_SET_ADDR(bp, daddr); | 720 | ASSERT(bp->b_map_count == 1); |
669 | XFS_BUF_READ(bp); | 721 | bp->b_bn = daddr; |
722 | bp->b_maps[0].bm_bn = daddr; | ||
723 | bp->b_flags |= XBF_READ; | ||
670 | 724 | ||
671 | xfsbdstrat(target->bt_mount, bp); | 725 | xfsbdstrat(target->bt_mount, bp); |
672 | error = xfs_buf_iowait(bp); | 726 | error = xfs_buf_iowait(bp); |
@@ -694,7 +748,11 @@ xfs_buf_set_empty( | |||
694 | bp->b_addr = NULL; | 748 | bp->b_addr = NULL; |
695 | bp->b_length = numblks; | 749 | bp->b_length = numblks; |
696 | bp->b_io_length = numblks; | 750 | bp->b_io_length = numblks; |
751 | |||
752 | ASSERT(bp->b_map_count == 1); | ||
697 | bp->b_bn = XFS_BUF_DADDR_NULL; | 753 | bp->b_bn = XFS_BUF_DADDR_NULL; |
754 | bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL; | ||
755 | bp->b_maps[0].bm_len = bp->b_length; | ||
698 | } | 756 | } |
699 | 757 | ||
700 | static inline struct page * | 758 | static inline struct page * |
@@ -758,9 +816,10 @@ xfs_buf_get_uncached( | |||
758 | { | 816 | { |
759 | unsigned long page_count; | 817 | unsigned long page_count; |
760 | int error, i; | 818 | int error, i; |
761 | xfs_buf_t *bp; | 819 | struct xfs_buf *bp; |
820 | DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); | ||
762 | 821 | ||
763 | bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0); | 822 | bp = _xfs_buf_alloc(target, &map, 1, 0); |
764 | if (unlikely(bp == NULL)) | 823 | if (unlikely(bp == NULL)) |
765 | goto fail; | 824 | goto fail; |
766 | 825 | ||
@@ -791,6 +850,7 @@ xfs_buf_get_uncached( | |||
791 | __free_page(bp->b_pages[i]); | 850 | __free_page(bp->b_pages[i]); |
792 | _xfs_buf_free_pages(bp); | 851 | _xfs_buf_free_pages(bp); |
793 | fail_free_buf: | 852 | fail_free_buf: |
853 | xfs_buf_free_maps(bp); | ||
794 | kmem_zone_free(xfs_buf_zone, bp); | 854 | kmem_zone_free(xfs_buf_zone, bp); |
795 | fail: | 855 | fail: |
796 | return NULL; | 856 | return NULL; |
@@ -1144,36 +1204,39 @@ xfs_buf_bio_end_io( | |||
1144 | bio_put(bio); | 1204 | bio_put(bio); |
1145 | } | 1205 | } |
1146 | 1206 | ||
1147 | STATIC void | 1207 | static void |
1148 | _xfs_buf_ioapply( | 1208 | xfs_buf_ioapply_map( |
1149 | xfs_buf_t *bp) | 1209 | struct xfs_buf *bp, |
1210 | int map, | ||
1211 | int *buf_offset, | ||
1212 | int *count, | ||
1213 | int rw) | ||
1150 | { | 1214 | { |
1151 | int rw, map_i, total_nr_pages, nr_pages; | 1215 | int page_index; |
1152 | struct bio *bio; | 1216 | int total_nr_pages = bp->b_page_count; |
1153 | int offset = bp->b_offset; | 1217 | int nr_pages; |
1154 | int size = BBTOB(bp->b_io_length); | 1218 | struct bio *bio; |
1155 | sector_t sector = bp->b_bn; | 1219 | sector_t sector = bp->b_maps[map].bm_bn; |
1220 | int size; | ||
1221 | int offset; | ||
1156 | 1222 | ||
1157 | total_nr_pages = bp->b_page_count; | 1223 | total_nr_pages = bp->b_page_count; |
1158 | map_i = 0; | ||
1159 | 1224 | ||
1160 | if (bp->b_flags & XBF_WRITE) { | 1225 | /* skip the pages in the buffer before the start offset */ |
1161 | if (bp->b_flags & XBF_SYNCIO) | 1226 | page_index = 0; |
1162 | rw = WRITE_SYNC; | 1227 | offset = *buf_offset; |
1163 | else | 1228 | while (offset >= PAGE_SIZE) { |
1164 | rw = WRITE; | 1229 | page_index++; |
1165 | if (bp->b_flags & XBF_FUA) | 1230 | offset -= PAGE_SIZE; |
1166 | rw |= REQ_FUA; | ||
1167 | if (bp->b_flags & XBF_FLUSH) | ||
1168 | rw |= REQ_FLUSH; | ||
1169 | } else if (bp->b_flags & XBF_READ_AHEAD) { | ||
1170 | rw = READA; | ||
1171 | } else { | ||
1172 | rw = READ; | ||
1173 | } | 1231 | } |
1174 | 1232 | ||
1175 | /* we only use the buffer cache for meta-data */ | 1233 | /* |
1176 | rw |= REQ_META; | 1234 | * Limit the IO size to the length of the current vector, and update the |
1235 | * remaining IO count for the next time around. | ||
1236 | */ | ||
1237 | size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count); | ||
1238 | *count -= size; | ||
1239 | *buf_offset += size; | ||
1177 | 1240 | ||
1178 | next_chunk: | 1241 | next_chunk: |
1179 | atomic_inc(&bp->b_io_remaining); | 1242 | atomic_inc(&bp->b_io_remaining); |
@@ -1188,13 +1251,14 @@ next_chunk: | |||
1188 | bio->bi_private = bp; | 1251 | bio->bi_private = bp; |
1189 | 1252 | ||
1190 | 1253 | ||
1191 | for (; size && nr_pages; nr_pages--, map_i++) { | 1254 | for (; size && nr_pages; nr_pages--, page_index++) { |
1192 | int rbytes, nbytes = PAGE_SIZE - offset; | 1255 | int rbytes, nbytes = PAGE_SIZE - offset; |
1193 | 1256 | ||
1194 | if (nbytes > size) | 1257 | if (nbytes > size) |
1195 | nbytes = size; | 1258 | nbytes = size; |
1196 | 1259 | ||
1197 | rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); | 1260 | rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes, |
1261 | offset); | ||
1198 | if (rbytes < nbytes) | 1262 | if (rbytes < nbytes) |
1199 | break; | 1263 | break; |
1200 | 1264 | ||
@@ -1216,6 +1280,54 @@ next_chunk: | |||
1216 | xfs_buf_ioerror(bp, EIO); | 1280 | xfs_buf_ioerror(bp, EIO); |
1217 | bio_put(bio); | 1281 | bio_put(bio); |
1218 | } | 1282 | } |
1283 | |||
1284 | } | ||
1285 | |||
1286 | STATIC void | ||
1287 | _xfs_buf_ioapply( | ||
1288 | struct xfs_buf *bp) | ||
1289 | { | ||
1290 | struct blk_plug plug; | ||
1291 | int rw; | ||
1292 | int offset; | ||
1293 | int size; | ||
1294 | int i; | ||
1295 | |||
1296 | if (bp->b_flags & XBF_WRITE) { | ||
1297 | if (bp->b_flags & XBF_SYNCIO) | ||
1298 | rw = WRITE_SYNC; | ||
1299 | else | ||
1300 | rw = WRITE; | ||
1301 | if (bp->b_flags & XBF_FUA) | ||
1302 | rw |= REQ_FUA; | ||
1303 | if (bp->b_flags & XBF_FLUSH) | ||
1304 | rw |= REQ_FLUSH; | ||
1305 | } else if (bp->b_flags & XBF_READ_AHEAD) { | ||
1306 | rw = READA; | ||
1307 | } else { | ||
1308 | rw = READ; | ||
1309 | } | ||
1310 | |||
1311 | /* we only use the buffer cache for meta-data */ | ||
1312 | rw |= REQ_META; | ||
1313 | |||
1314 | /* | ||
1315 | * Walk all the vectors issuing IO on them. Set up the initial offset | ||
1316 | * into the buffer and the desired IO size before we start - | ||
1317 | * _xfs_buf_ioapply_vec() will modify them appropriately for each | ||
1318 | * subsequent call. | ||
1319 | */ | ||
1320 | offset = bp->b_offset; | ||
1321 | size = BBTOB(bp->b_io_length); | ||
1322 | blk_start_plug(&plug); | ||
1323 | for (i = 0; i < bp->b_map_count; i++) { | ||
1324 | xfs_buf_ioapply_map(bp, i, &offset, &size, rw); | ||
1325 | if (bp->b_error) | ||
1326 | break; | ||
1327 | if (size <= 0) | ||
1328 | break; /* all done */ | ||
1329 | } | ||
1330 | blk_finish_plug(&plug); | ||
1219 | } | 1331 | } |
1220 | 1332 | ||
1221 | void | 1333 | void |
@@ -1557,7 +1669,7 @@ xfs_buf_cmp( | |||
1557 | struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); | 1669 | struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); |
1558 | xfs_daddr_t diff; | 1670 | xfs_daddr_t diff; |
1559 | 1671 | ||
1560 | diff = ap->b_bn - bp->b_bn; | 1672 | diff = ap->b_map.bm_bn - bp->b_map.bm_bn; |
1561 | if (diff < 0) | 1673 | if (diff < 0) |
1562 | return -1; | 1674 | return -1; |
1563 | if (diff > 0) | 1675 | if (diff > 0) |