aboutsummaryrefslogtreecommitdiffstats
path: root/fs/bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bio.c')
-rw-r--r--fs/bio.c361
1 files changed, 269 insertions, 92 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 77a55bcceedb..062299acbccd 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,9 +26,16 @@
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <trace/block.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 30#include <scsi/sg.h> /* for struct sg_iovec */
30 31
31static struct kmem_cache *bio_slab __read_mostly; 32DEFINE_TRACE(block_split);
33
34/*
35 * Test patch to inline a certain number of bi_io_vec's inside the bio
36 * itself, to shrink a bio data allocation from two mempool calls to one
37 */
38#define BIO_INLINE_VECS 4
32 39
33static mempool_t *bio_split_pool __read_mostly; 40static mempool_t *bio_split_pool __read_mostly;
34 41
@@ -37,9 +44,8 @@ static mempool_t *bio_split_pool __read_mostly;
37 * break badly! cannot be bigger than what you can fit into an 44 * break badly! cannot be bigger than what you can fit into an
38 * unsigned short 45 * unsigned short
39 */ 46 */
40
41#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 47#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
42static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { 48struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
43 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 49 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
44}; 50};
45#undef BV 51#undef BV
@@ -50,12 +56,121 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
50 */ 56 */
51struct bio_set *fs_bio_set; 57struct bio_set *fs_bio_set;
52 58
59/*
60 * Our slab pool management
61 */
62struct bio_slab {
63 struct kmem_cache *slab;
64 unsigned int slab_ref;
65 unsigned int slab_size;
66 char name[8];
67};
68static DEFINE_MUTEX(bio_slab_lock);
69static struct bio_slab *bio_slabs;
70static unsigned int bio_slab_nr, bio_slab_max;
71
72static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
73{
74 unsigned int sz = sizeof(struct bio) + extra_size;
75 struct kmem_cache *slab = NULL;
76 struct bio_slab *bslab;
77 unsigned int i, entry = -1;
78
79 mutex_lock(&bio_slab_lock);
80
81 i = 0;
82 while (i < bio_slab_nr) {
83 struct bio_slab *bslab = &bio_slabs[i];
84
85 if (!bslab->slab && entry == -1)
86 entry = i;
87 else if (bslab->slab_size == sz) {
88 slab = bslab->slab;
89 bslab->slab_ref++;
90 break;
91 }
92 i++;
93 }
94
95 if (slab)
96 goto out_unlock;
97
98 if (bio_slab_nr == bio_slab_max && entry == -1) {
99 bio_slab_max <<= 1;
100 bio_slabs = krealloc(bio_slabs,
101 bio_slab_max * sizeof(struct bio_slab),
102 GFP_KERNEL);
103 if (!bio_slabs)
104 goto out_unlock;
105 }
106 if (entry == -1)
107 entry = bio_slab_nr++;
108
109 bslab = &bio_slabs[entry];
110
111 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
112 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
113 if (!slab)
114 goto out_unlock;
115
116 printk("bio: create slab <%s> at %d\n", bslab->name, entry);
117 bslab->slab = slab;
118 bslab->slab_ref = 1;
119 bslab->slab_size = sz;
120out_unlock:
121 mutex_unlock(&bio_slab_lock);
122 return slab;
123}
124
125static void bio_put_slab(struct bio_set *bs)
126{
127 struct bio_slab *bslab = NULL;
128 unsigned int i;
129
130 mutex_lock(&bio_slab_lock);
131
132 for (i = 0; i < bio_slab_nr; i++) {
133 if (bs->bio_slab == bio_slabs[i].slab) {
134 bslab = &bio_slabs[i];
135 break;
136 }
137 }
138
139 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
140 goto out;
141
142 WARN_ON(!bslab->slab_ref);
143
144 if (--bslab->slab_ref)
145 goto out;
146
147 kmem_cache_destroy(bslab->slab);
148 bslab->slab = NULL;
149
150out:
151 mutex_unlock(&bio_slab_lock);
152}
153
53unsigned int bvec_nr_vecs(unsigned short idx) 154unsigned int bvec_nr_vecs(unsigned short idx)
54{ 155{
55 return bvec_slabs[idx].nr_vecs; 156 return bvec_slabs[idx].nr_vecs;
56} 157}
57 158
58struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) 159void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
160{
161 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
162
163 if (idx == BIOVEC_MAX_IDX)
164 mempool_free(bv, bs->bvec_pool);
165 else {
166 struct biovec_slab *bvs = bvec_slabs + idx;
167
168 kmem_cache_free(bvs->slab, bv);
169 }
170}
171
172struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
173 struct bio_set *bs)
59{ 174{
60 struct bio_vec *bvl; 175 struct bio_vec *bvl;
61 176
@@ -64,60 +179,85 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
64 * If not, this is a bio_kmalloc() allocation and just do a 179 * If not, this is a bio_kmalloc() allocation and just do a
65 * kzalloc() for the exact number of vecs right away. 180 * kzalloc() for the exact number of vecs right away.
66 */ 181 */
67 if (bs) { 182 if (!bs)
183 bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask);
184
185 /*
186 * see comment near bvec_array define!
187 */
188 switch (nr) {
189 case 1:
190 *idx = 0;
191 break;
192 case 2 ... 4:
193 *idx = 1;
194 break;
195 case 5 ... 16:
196 *idx = 2;
197 break;
198 case 17 ... 64:
199 *idx = 3;
200 break;
201 case 65 ... 128:
202 *idx = 4;
203 break;
204 case 129 ... BIO_MAX_PAGES:
205 *idx = 5;
206 break;
207 default:
208 return NULL;
209 }
210
211 /*
212 * idx now points to the pool we want to allocate from. only the
213 * 1-vec entry pool is mempool backed.
214 */
215 if (*idx == BIOVEC_MAX_IDX) {
216fallback:
217 bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
218 } else {
219 struct biovec_slab *bvs = bvec_slabs + *idx;
220 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
221
68 /* 222 /*
69 * see comment near bvec_array define! 223 * Make this allocation restricted and don't dump info on
224 * allocation failures, since we'll fallback to the mempool
225 * in case of failure.
70 */ 226 */
71 switch (nr) { 227 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
72 case 1:
73 *idx = 0;
74 break;
75 case 2 ... 4:
76 *idx = 1;
77 break;
78 case 5 ... 16:
79 *idx = 2;
80 break;
81 case 17 ... 64:
82 *idx = 3;
83 break;
84 case 65 ... 128:
85 *idx = 4;
86 break;
87 case 129 ... BIO_MAX_PAGES:
88 *idx = 5;
89 break;
90 default:
91 return NULL;
92 }
93 228
94 /* 229 /*
95 * idx now points to the pool we want to allocate from 230 * Try a slab allocation. If this fails and __GFP_WAIT
231 * is set, retry with the 1-entry mempool
96 */ 232 */
97 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 233 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
98 if (bvl) 234 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
99 memset(bvl, 0, 235 *idx = BIOVEC_MAX_IDX;
100 bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); 236 goto fallback;
101 } else 237 }
102 bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); 238 }
103 239
104 return bvl; 240 return bvl;
105} 241}
106 242
107void bio_free(struct bio *bio, struct bio_set *bio_set) 243void bio_free(struct bio *bio, struct bio_set *bs)
108{ 244{
109 if (bio->bi_io_vec) { 245 void *p;
110 const int pool_idx = BIO_POOL_IDX(bio);
111 246
112 BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); 247 if (bio_has_allocated_vec(bio))
113 248 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
114 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
115 }
116 249
117 if (bio_integrity(bio)) 250 if (bio_integrity(bio))
118 bio_integrity_free(bio, bio_set); 251 bio_integrity_free(bio, bs);
119 252
120 mempool_free(bio, bio_set->bio_pool); 253 /*
254 * If we have front padding, adjust the bio pointer before freeing
255 */
256 p = bio;
257 if (bs->front_pad)
258 p -= bs->front_pad;
259
260 mempool_free(p, bs->bio_pool);
121} 261}
122 262
123/* 263/*
@@ -130,7 +270,8 @@ static void bio_fs_destructor(struct bio *bio)
130 270
131static void bio_kmalloc_destructor(struct bio *bio) 271static void bio_kmalloc_destructor(struct bio *bio)
132{ 272{
133 kfree(bio->bi_io_vec); 273 if (bio_has_allocated_vec(bio))
274 kfree(bio->bi_io_vec);
134 kfree(bio); 275 kfree(bio);
135} 276}
136 277
@@ -154,16 +295,20 @@ void bio_init(struct bio *bio)
154 * for a &struct bio to become free. If a %NULL @bs is passed in, we will 295 * for a &struct bio to become free. If a %NULL @bs is passed in, we will
155 * fall back to just using @kmalloc to allocate the required memory. 296 * fall back to just using @kmalloc to allocate the required memory.
156 * 297 *
157 * allocate bio and iovecs from the memory pools specified by the 298 * Note that the caller must set ->bi_destructor on succesful return
158 * bio_set structure, or @kmalloc if none given. 299 * of a bio, to do the appropriate freeing of the bio once the reference
300 * count drops to zero.
159 **/ 301 **/
160struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
161{ 303{
162 struct bio *bio; 304 struct bio *bio = NULL;
163 305
164 if (bs) 306 if (bs) {
165 bio = mempool_alloc(bs->bio_pool, gfp_mask); 307 void *p = mempool_alloc(bs->bio_pool, gfp_mask);
166 else 308
309 if (p)
310 bio = p + bs->front_pad;
311 } else
167 bio = kmalloc(sizeof(*bio), gfp_mask); 312 bio = kmalloc(sizeof(*bio), gfp_mask);
168 313
169 if (likely(bio)) { 314 if (likely(bio)) {
@@ -173,7 +318,15 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
173 if (likely(nr_iovecs)) { 318 if (likely(nr_iovecs)) {
174 unsigned long uninitialized_var(idx); 319 unsigned long uninitialized_var(idx);
175 320
176 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 321 if (nr_iovecs <= BIO_INLINE_VECS) {
322 idx = 0;
323 bvl = bio->bi_inline_vecs;
324 nr_iovecs = BIO_INLINE_VECS;
325 } else {
326 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx,
327 bs);
328 nr_iovecs = bvec_nr_vecs(idx);
329 }
177 if (unlikely(!bvl)) { 330 if (unlikely(!bvl)) {
178 if (bs) 331 if (bs)
179 mempool_free(bio, bs->bio_pool); 332 mempool_free(bio, bs->bio_pool);
@@ -183,7 +336,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
183 goto out; 336 goto out;
184 } 337 }
185 bio->bi_flags |= idx << BIO_POOL_OFFSET; 338 bio->bi_flags |= idx << BIO_POOL_OFFSET;
186 bio->bi_max_vecs = bvec_nr_vecs(idx); 339 bio->bi_max_vecs = nr_iovecs;
187 } 340 }
188 bio->bi_io_vec = bvl; 341 bio->bi_io_vec = bvl;
189 } 342 }
@@ -635,6 +788,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
635 int i, ret; 788 int i, ret;
636 int nr_pages = 0; 789 int nr_pages = 0;
637 unsigned int len = 0; 790 unsigned int len = 0;
791 unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
638 792
639 for (i = 0; i < iov_count; i++) { 793 for (i = 0; i < iov_count; i++) {
640 unsigned long uaddr; 794 unsigned long uaddr;
@@ -661,35 +815,42 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
661 bio->bi_rw |= (!write_to_vm << BIO_RW); 815 bio->bi_rw |= (!write_to_vm << BIO_RW);
662 816
663 ret = 0; 817 ret = 0;
664 i = 0; 818
819 if (map_data) {
820 nr_pages = 1 << map_data->page_order;
821 i = map_data->offset / PAGE_SIZE;
822 }
665 while (len) { 823 while (len) {
666 unsigned int bytes; 824 unsigned int bytes = PAGE_SIZE;
667 825
668 if (map_data) 826 bytes -= offset;
669 bytes = 1U << (PAGE_SHIFT + map_data->page_order);
670 else
671 bytes = PAGE_SIZE;
672 827
673 if (bytes > len) 828 if (bytes > len)
674 bytes = len; 829 bytes = len;
675 830
676 if (map_data) { 831 if (map_data) {
677 if (i == map_data->nr_entries) { 832 if (i == map_data->nr_entries * nr_pages) {
678 ret = -ENOMEM; 833 ret = -ENOMEM;
679 break; 834 break;
680 } 835 }
681 page = map_data->pages[i++]; 836
682 } else 837 page = map_data->pages[i / nr_pages];
838 page += (i % nr_pages);
839
840 i++;
841 } else {
683 page = alloc_page(q->bounce_gfp | gfp_mask); 842 page = alloc_page(q->bounce_gfp | gfp_mask);
684 if (!page) { 843 if (!page) {
685 ret = -ENOMEM; 844 ret = -ENOMEM;
686 break; 845 break;
846 }
687 } 847 }
688 848
689 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) 849 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
690 break; 850 break;
691 851
692 len -= bytes; 852 len -= bytes;
853 offset = 0;
693 } 854 }
694 855
695 if (ret) 856 if (ret)
@@ -698,7 +859,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
698 /* 859 /*
699 * success 860 * success
700 */ 861 */
701 if (!write_to_vm) { 862 if (!write_to_vm && (!map_data || !map_data->null_mapped)) {
702 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); 863 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
703 if (ret) 864 if (ret)
704 goto cleanup; 865 goto cleanup;
@@ -1263,7 +1424,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1263 if (!bp) 1424 if (!bp)
1264 return bp; 1425 return bp;
1265 1426
1266 blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi, 1427 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1267 bi->bi_sector + first_sectors); 1428 bi->bi_sector + first_sectors);
1268 1429
1269 BUG_ON(bi->bi_vcnt != 1); 1430 BUG_ON(bi->bi_vcnt != 1);
@@ -1343,30 +1504,18 @@ EXPORT_SYMBOL(bio_sector_offset);
1343 */ 1504 */
1344static int biovec_create_pools(struct bio_set *bs, int pool_entries) 1505static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1345{ 1506{
1346 int i; 1507 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1347 1508
1348 for (i = 0; i < BIOVEC_NR_POOLS; i++) { 1509 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
1349 struct biovec_slab *bp = bvec_slabs + i; 1510 if (!bs->bvec_pool)
1350 mempool_t **bvp = bs->bvec_pools + i; 1511 return -ENOMEM;
1351 1512
1352 *bvp = mempool_create_slab_pool(pool_entries, bp->slab);
1353 if (!*bvp)
1354 return -ENOMEM;
1355 }
1356 return 0; 1513 return 0;
1357} 1514}
1358 1515
1359static void biovec_free_pools(struct bio_set *bs) 1516static void biovec_free_pools(struct bio_set *bs)
1360{ 1517{
1361 int i; 1518 mempool_destroy(bs->bvec_pool);
1362
1363 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
1364 mempool_t *bvp = bs->bvec_pools[i];
1365
1366 if (bvp)
1367 mempool_destroy(bvp);
1368 }
1369
1370} 1519}
1371 1520
1372void bioset_free(struct bio_set *bs) 1521void bioset_free(struct bio_set *bs)
@@ -1376,25 +1525,49 @@ void bioset_free(struct bio_set *bs)
1376 1525
1377 bioset_integrity_free(bs); 1526 bioset_integrity_free(bs);
1378 biovec_free_pools(bs); 1527 biovec_free_pools(bs);
1528 bio_put_slab(bs);
1379 1529
1380 kfree(bs); 1530 kfree(bs);
1381} 1531}
1382 1532
1383struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size) 1533/**
1534 * bioset_create - Create a bio_set
1535 * @pool_size: Number of bio and bio_vecs to cache in the mempool
1536 * @front_pad: Number of bytes to allocate in front of the returned bio
1537 *
1538 * Description:
1539 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1540 * to ask for a number of bytes to be allocated in front of the bio.
1541 * Front pad allocation is useful for embedding the bio inside
1542 * another structure, to avoid allocating extra data to go with the bio.
1543 * Note that the bio must be embedded at the END of that structure always,
1544 * or things will break badly.
1545 */
1546struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1384{ 1547{
1385 struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL); 1548 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1549 struct bio_set *bs;
1386 1550
1551 bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1387 if (!bs) 1552 if (!bs)
1388 return NULL; 1553 return NULL;
1389 1554
1390 bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab); 1555 bs->front_pad = front_pad;
1556
1557 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
1558 if (!bs->bio_slab) {
1559 kfree(bs);
1560 return NULL;
1561 }
1562
1563 bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
1391 if (!bs->bio_pool) 1564 if (!bs->bio_pool)
1392 goto bad; 1565 goto bad;
1393 1566
1394 if (bioset_integrity_create(bs, bio_pool_size)) 1567 if (bioset_integrity_create(bs, pool_size))
1395 goto bad; 1568 goto bad;
1396 1569
1397 if (!biovec_create_pools(bs, bvec_pool_size)) 1570 if (!biovec_create_pools(bs, pool_size))
1398 return bs; 1571 return bs;
1399 1572
1400bad: 1573bad:
@@ -1418,12 +1591,16 @@ static void __init biovec_init_slabs(void)
1418 1591
1419static int __init init_bio(void) 1592static int __init init_bio(void)
1420{ 1593{
1421 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 1594 bio_slab_max = 2;
1595 bio_slab_nr = 0;
1596 bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
1597 if (!bio_slabs)
1598 panic("bio: can't allocate bios\n");
1422 1599
1423 bio_integrity_init_slab(); 1600 bio_integrity_init_slab();
1424 biovec_init_slabs(); 1601 biovec_init_slabs();
1425 1602
1426 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); 1603 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
1427 if (!fs_bio_set) 1604 if (!fs_bio_set)
1428 panic("bio: can't allocate bios\n"); 1605 panic("bio: can't allocate bios\n");
1429 1606