diff options
Diffstat (limited to 'drivers/md/bcache/io.c')
-rw-r--r-- | drivers/md/bcache/io.c | 390 |
1 files changed, 390 insertions, 0 deletions
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c new file mode 100644 index 000000000000..f565512f6fac --- /dev/null +++ b/drivers/md/bcache/io.c | |||
@@ -0,0 +1,390 @@ | |||
1 | /* | ||
2 | * Some low level IO code, and hacks for various block layer limitations | ||
3 | * | ||
4 | * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> | ||
5 | * Copyright 2012 Google, Inc. | ||
6 | */ | ||
7 | |||
8 | #include "bcache.h" | ||
9 | #include "bset.h" | ||
10 | #include "debug.h" | ||
11 | |||
12 | static void bch_bi_idx_hack_endio(struct bio *bio, int error) | ||
13 | { | ||
14 | struct bio *p = bio->bi_private; | ||
15 | |||
16 | bio_endio(p, error); | ||
17 | bio_put(bio); | ||
18 | } | ||
19 | |||
20 | static void bch_generic_make_request_hack(struct bio *bio) | ||
21 | { | ||
22 | if (bio->bi_idx) { | ||
23 | struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio)); | ||
24 | |||
25 | memcpy(clone->bi_io_vec, | ||
26 | bio_iovec(bio), | ||
27 | bio_segments(bio) * sizeof(struct bio_vec)); | ||
28 | |||
29 | clone->bi_sector = bio->bi_sector; | ||
30 | clone->bi_bdev = bio->bi_bdev; | ||
31 | clone->bi_rw = bio->bi_rw; | ||
32 | clone->bi_vcnt = bio_segments(bio); | ||
33 | clone->bi_size = bio->bi_size; | ||
34 | |||
35 | clone->bi_private = bio; | ||
36 | clone->bi_end_io = bch_bi_idx_hack_endio; | ||
37 | |||
38 | bio = clone; | ||
39 | } | ||
40 | |||
41 | generic_make_request(bio); | ||
42 | } | ||
43 | |||
44 | /** | ||
45 | * bch_bio_split - split a bio | ||
46 | * @bio: bio to split | ||
47 | * @sectors: number of sectors to split from the front of @bio | ||
48 | * @gfp: gfp mask | ||
49 | * @bs: bio set to allocate from | ||
50 | * | ||
51 | * Allocates and returns a new bio which represents @sectors from the start of | ||
52 | * @bio, and updates @bio to represent the remaining sectors. | ||
53 | * | ||
54 | * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio | ||
55 | * unchanged. | ||
56 | * | ||
57 | * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a | ||
58 | * bvec boundry; it is the caller's responsibility to ensure that @bio is not | ||
59 | * freed before the split. | ||
60 | * | ||
61 | * If bch_bio_split() is running under generic_make_request(), it's not safe to | ||
62 | * allocate more than one bio from the same bio set. Therefore, if it is running | ||
63 | * under generic_make_request() it masks out __GFP_WAIT when doing the | ||
64 | * allocation. The caller must check for failure if there's any possibility of | ||
65 | * it being called from under generic_make_request(); it is then the caller's | ||
66 | * responsibility to retry from a safe context (by e.g. punting to workqueue). | ||
67 | */ | ||
68 | struct bio *bch_bio_split(struct bio *bio, int sectors, | ||
69 | gfp_t gfp, struct bio_set *bs) | ||
70 | { | ||
71 | unsigned idx = bio->bi_idx, vcnt = 0, nbytes = sectors << 9; | ||
72 | struct bio_vec *bv; | ||
73 | struct bio *ret = NULL; | ||
74 | |||
75 | BUG_ON(sectors <= 0); | ||
76 | |||
77 | /* | ||
78 | * If we're being called from underneath generic_make_request() and we | ||
79 | * already allocated any bios from this bio set, we risk deadlock if we | ||
80 | * use the mempool. So instead, we possibly fail and let the caller punt | ||
81 | * to workqueue or somesuch and retry in a safe context. | ||
82 | */ | ||
83 | if (current->bio_list) | ||
84 | gfp &= ~__GFP_WAIT; | ||
85 | |||
86 | if (sectors >= bio_sectors(bio)) | ||
87 | return bio; | ||
88 | |||
89 | if (bio->bi_rw & REQ_DISCARD) { | ||
90 | ret = bio_alloc_bioset(gfp, 1, bs); | ||
91 | idx = 0; | ||
92 | goto out; | ||
93 | } | ||
94 | |||
95 | bio_for_each_segment(bv, bio, idx) { | ||
96 | vcnt = idx - bio->bi_idx; | ||
97 | |||
98 | if (!nbytes) { | ||
99 | ret = bio_alloc_bioset(gfp, vcnt, bs); | ||
100 | if (!ret) | ||
101 | return NULL; | ||
102 | |||
103 | memcpy(ret->bi_io_vec, bio_iovec(bio), | ||
104 | sizeof(struct bio_vec) * vcnt); | ||
105 | |||
106 | break; | ||
107 | } else if (nbytes < bv->bv_len) { | ||
108 | ret = bio_alloc_bioset(gfp, ++vcnt, bs); | ||
109 | if (!ret) | ||
110 | return NULL; | ||
111 | |||
112 | memcpy(ret->bi_io_vec, bio_iovec(bio), | ||
113 | sizeof(struct bio_vec) * vcnt); | ||
114 | |||
115 | ret->bi_io_vec[vcnt - 1].bv_len = nbytes; | ||
116 | bv->bv_offset += nbytes; | ||
117 | bv->bv_len -= nbytes; | ||
118 | break; | ||
119 | } | ||
120 | |||
121 | nbytes -= bv->bv_len; | ||
122 | } | ||
123 | out: | ||
124 | ret->bi_bdev = bio->bi_bdev; | ||
125 | ret->bi_sector = bio->bi_sector; | ||
126 | ret->bi_size = sectors << 9; | ||
127 | ret->bi_rw = bio->bi_rw; | ||
128 | ret->bi_vcnt = vcnt; | ||
129 | ret->bi_max_vecs = vcnt; | ||
130 | |||
131 | bio->bi_sector += sectors; | ||
132 | bio->bi_size -= sectors << 9; | ||
133 | bio->bi_idx = idx; | ||
134 | |||
135 | if (bio_integrity(bio)) { | ||
136 | if (bio_integrity_clone(ret, bio, gfp)) { | ||
137 | bio_put(ret); | ||
138 | return NULL; | ||
139 | } | ||
140 | |||
141 | bio_integrity_trim(ret, 0, bio_sectors(ret)); | ||
142 | bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio)); | ||
143 | } | ||
144 | |||
145 | return ret; | ||
146 | } | ||
147 | |||
148 | static unsigned bch_bio_max_sectors(struct bio *bio) | ||
149 | { | ||
150 | unsigned ret = bio_sectors(bio); | ||
151 | struct request_queue *q = bdev_get_queue(bio->bi_bdev); | ||
152 | struct bio_vec *bv, *end = bio_iovec(bio) + | ||
153 | min_t(int, bio_segments(bio), queue_max_segments(q)); | ||
154 | |||
155 | struct bvec_merge_data bvm = { | ||
156 | .bi_bdev = bio->bi_bdev, | ||
157 | .bi_sector = bio->bi_sector, | ||
158 | .bi_size = 0, | ||
159 | .bi_rw = bio->bi_rw, | ||
160 | }; | ||
161 | |||
162 | if (bio->bi_rw & REQ_DISCARD) | ||
163 | return min(ret, q->limits.max_discard_sectors); | ||
164 | |||
165 | if (bio_segments(bio) > queue_max_segments(q) || | ||
166 | q->merge_bvec_fn) { | ||
167 | ret = 0; | ||
168 | |||
169 | for (bv = bio_iovec(bio); bv < end; bv++) { | ||
170 | if (q->merge_bvec_fn && | ||
171 | q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) | ||
172 | break; | ||
173 | |||
174 | ret += bv->bv_len >> 9; | ||
175 | bvm.bi_size += bv->bv_len; | ||
176 | } | ||
177 | |||
178 | if (ret >= (BIO_MAX_PAGES * PAGE_SIZE) >> 9) | ||
179 | return (BIO_MAX_PAGES * PAGE_SIZE) >> 9; | ||
180 | } | ||
181 | |||
182 | ret = min(ret, queue_max_sectors(q)); | ||
183 | |||
184 | WARN_ON(!ret); | ||
185 | ret = max_t(int, ret, bio_iovec(bio)->bv_len >> 9); | ||
186 | |||
187 | return ret; | ||
188 | } | ||
189 | |||
190 | static void bch_bio_submit_split_done(struct closure *cl) | ||
191 | { | ||
192 | struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); | ||
193 | |||
194 | s->bio->bi_end_io = s->bi_end_io; | ||
195 | s->bio->bi_private = s->bi_private; | ||
196 | bio_endio(s->bio, 0); | ||
197 | |||
198 | closure_debug_destroy(&s->cl); | ||
199 | mempool_free(s, s->p->bio_split_hook); | ||
200 | } | ||
201 | |||
202 | static void bch_bio_submit_split_endio(struct bio *bio, int error) | ||
203 | { | ||
204 | struct closure *cl = bio->bi_private; | ||
205 | struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); | ||
206 | |||
207 | if (error) | ||
208 | clear_bit(BIO_UPTODATE, &s->bio->bi_flags); | ||
209 | |||
210 | bio_put(bio); | ||
211 | closure_put(cl); | ||
212 | } | ||
213 | |||
214 | static void __bch_bio_submit_split(struct closure *cl) | ||
215 | { | ||
216 | struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); | ||
217 | struct bio *bio = s->bio, *n; | ||
218 | |||
219 | do { | ||
220 | n = bch_bio_split(bio, bch_bio_max_sectors(bio), | ||
221 | GFP_NOIO, s->p->bio_split); | ||
222 | if (!n) | ||
223 | continue_at(cl, __bch_bio_submit_split, system_wq); | ||
224 | |||
225 | n->bi_end_io = bch_bio_submit_split_endio; | ||
226 | n->bi_private = cl; | ||
227 | |||
228 | closure_get(cl); | ||
229 | bch_generic_make_request_hack(n); | ||
230 | } while (n != bio); | ||
231 | |||
232 | continue_at(cl, bch_bio_submit_split_done, NULL); | ||
233 | } | ||
234 | |||
235 | void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) | ||
236 | { | ||
237 | struct bio_split_hook *s; | ||
238 | |||
239 | if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) | ||
240 | goto submit; | ||
241 | |||
242 | if (bio_sectors(bio) <= bch_bio_max_sectors(bio)) | ||
243 | goto submit; | ||
244 | |||
245 | s = mempool_alloc(p->bio_split_hook, GFP_NOIO); | ||
246 | |||
247 | s->bio = bio; | ||
248 | s->p = p; | ||
249 | s->bi_end_io = bio->bi_end_io; | ||
250 | s->bi_private = bio->bi_private; | ||
251 | bio_get(bio); | ||
252 | |||
253 | closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL); | ||
254 | return; | ||
255 | submit: | ||
256 | bch_generic_make_request_hack(bio); | ||
257 | } | ||
258 | |||
259 | /* Bios with headers */ | ||
260 | |||
261 | void bch_bbio_free(struct bio *bio, struct cache_set *c) | ||
262 | { | ||
263 | struct bbio *b = container_of(bio, struct bbio, bio); | ||
264 | mempool_free(b, c->bio_meta); | ||
265 | } | ||
266 | |||
267 | struct bio *bch_bbio_alloc(struct cache_set *c) | ||
268 | { | ||
269 | struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO); | ||
270 | struct bio *bio = &b->bio; | ||
271 | |||
272 | bio_init(bio); | ||
273 | bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; | ||
274 | bio->bi_max_vecs = bucket_pages(c); | ||
275 | bio->bi_io_vec = bio->bi_inline_vecs; | ||
276 | |||
277 | return bio; | ||
278 | } | ||
279 | |||
280 | void __bch_submit_bbio(struct bio *bio, struct cache_set *c) | ||
281 | { | ||
282 | struct bbio *b = container_of(bio, struct bbio, bio); | ||
283 | |||
284 | bio->bi_sector = PTR_OFFSET(&b->key, 0); | ||
285 | bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; | ||
286 | |||
287 | b->submit_time_us = local_clock_us(); | ||
288 | closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0)); | ||
289 | } | ||
290 | |||
291 | void bch_submit_bbio(struct bio *bio, struct cache_set *c, | ||
292 | struct bkey *k, unsigned ptr) | ||
293 | { | ||
294 | struct bbio *b = container_of(bio, struct bbio, bio); | ||
295 | bch_bkey_copy_single_ptr(&b->key, k, ptr); | ||
296 | __bch_submit_bbio(bio, c); | ||
297 | } | ||
298 | |||
299 | /* IO errors */ | ||
300 | |||
301 | void bch_count_io_errors(struct cache *ca, int error, const char *m) | ||
302 | { | ||
303 | /* | ||
304 | * The halflife of an error is: | ||
305 | * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh | ||
306 | */ | ||
307 | |||
308 | if (ca->set->error_decay) { | ||
309 | unsigned count = atomic_inc_return(&ca->io_count); | ||
310 | |||
311 | while (count > ca->set->error_decay) { | ||
312 | unsigned errors; | ||
313 | unsigned old = count; | ||
314 | unsigned new = count - ca->set->error_decay; | ||
315 | |||
316 | /* | ||
317 | * First we subtract refresh from count; each time we | ||
318 | * succesfully do so, we rescale the errors once: | ||
319 | */ | ||
320 | |||
321 | count = atomic_cmpxchg(&ca->io_count, old, new); | ||
322 | |||
323 | if (count == old) { | ||
324 | count = new; | ||
325 | |||
326 | errors = atomic_read(&ca->io_errors); | ||
327 | do { | ||
328 | old = errors; | ||
329 | new = ((uint64_t) errors * 127) / 128; | ||
330 | errors = atomic_cmpxchg(&ca->io_errors, | ||
331 | old, new); | ||
332 | } while (old != errors); | ||
333 | } | ||
334 | } | ||
335 | } | ||
336 | |||
337 | if (error) { | ||
338 | char buf[BDEVNAME_SIZE]; | ||
339 | unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, | ||
340 | &ca->io_errors); | ||
341 | errors >>= IO_ERROR_SHIFT; | ||
342 | |||
343 | if (errors < ca->set->error_limit) | ||
344 | pr_err("%s: IO error on %s, recovering", | ||
345 | bdevname(ca->bdev, buf), m); | ||
346 | else | ||
347 | bch_cache_set_error(ca->set, | ||
348 | "%s: too many IO errors %s", | ||
349 | bdevname(ca->bdev, buf), m); | ||
350 | } | ||
351 | } | ||
352 | |||
353 | void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, | ||
354 | int error, const char *m) | ||
355 | { | ||
356 | struct bbio *b = container_of(bio, struct bbio, bio); | ||
357 | struct cache *ca = PTR_CACHE(c, &b->key, 0); | ||
358 | |||
359 | unsigned threshold = bio->bi_rw & REQ_WRITE | ||
360 | ? c->congested_write_threshold_us | ||
361 | : c->congested_read_threshold_us; | ||
362 | |||
363 | if (threshold) { | ||
364 | unsigned t = local_clock_us(); | ||
365 | |||
366 | int us = t - b->submit_time_us; | ||
367 | int congested = atomic_read(&c->congested); | ||
368 | |||
369 | if (us > (int) threshold) { | ||
370 | int ms = us / 1024; | ||
371 | c->congested_last_us = t; | ||
372 | |||
373 | ms = min(ms, CONGESTED_MAX + congested); | ||
374 | atomic_sub(ms, &c->congested); | ||
375 | } else if (congested < 0) | ||
376 | atomic_inc(&c->congested); | ||
377 | } | ||
378 | |||
379 | bch_count_io_errors(ca, error, m); | ||
380 | } | ||
381 | |||
382 | void bch_bbio_endio(struct cache_set *c, struct bio *bio, | ||
383 | int error, const char *m) | ||
384 | { | ||
385 | struct closure *cl = bio->bi_private; | ||
386 | |||
387 | bch_bbio_count_io_errors(c, bio, error, m); | ||
388 | bio_put(bio); | ||
389 | closure_put(cl); | ||
390 | } | ||