diff options
Diffstat (limited to 'drivers/md/bcache/io.c')
-rw-r--r-- | drivers/md/bcache/io.c | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c new file mode 100644 index 000000000000..48efd4dea645 --- /dev/null +++ b/drivers/md/bcache/io.c | |||
@@ -0,0 +1,397 @@ | |||
1 | /* | ||
2 | * Some low level IO code, and hacks for various block layer limitations | ||
3 | * | ||
4 | * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> | ||
5 | * Copyright 2012 Google, Inc. | ||
6 | */ | ||
7 | |||
8 | #include "bcache.h" | ||
9 | #include "bset.h" | ||
10 | #include "debug.h" | ||
11 | |||
12 | static void bch_bi_idx_hack_endio(struct bio *bio, int error) | ||
13 | { | ||
14 | struct bio *p = bio->bi_private; | ||
15 | |||
16 | bio_endio(p, error); | ||
17 | bio_put(bio); | ||
18 | } | ||
19 | |||
20 | static void bch_generic_make_request_hack(struct bio *bio) | ||
21 | { | ||
22 | if (bio->bi_idx) { | ||
23 | struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio)); | ||
24 | |||
25 | memcpy(clone->bi_io_vec, | ||
26 | bio_iovec(bio), | ||
27 | bio_segments(bio) * sizeof(struct bio_vec)); | ||
28 | |||
29 | clone->bi_sector = bio->bi_sector; | ||
30 | clone->bi_bdev = bio->bi_bdev; | ||
31 | clone->bi_rw = bio->bi_rw; | ||
32 | clone->bi_vcnt = bio_segments(bio); | ||
33 | clone->bi_size = bio->bi_size; | ||
34 | |||
35 | clone->bi_private = bio; | ||
36 | clone->bi_end_io = bch_bi_idx_hack_endio; | ||
37 | |||
38 | bio = clone; | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * Hack, since drivers that clone bios clone up to bi_max_vecs, but our | ||
43 | * bios might have had more than that (before we split them per device | ||
44 | * limitations). | ||
45 | * | ||
46 | * To be taken out once immutable bvec stuff is in. | ||
47 | */ | ||
48 | bio->bi_max_vecs = bio->bi_vcnt; | ||
49 | |||
50 | generic_make_request(bio); | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * bch_bio_split - split a bio | ||
55 | * @bio: bio to split | ||
56 | * @sectors: number of sectors to split from the front of @bio | ||
57 | * @gfp: gfp mask | ||
58 | * @bs: bio set to allocate from | ||
59 | * | ||
60 | * Allocates and returns a new bio which represents @sectors from the start of | ||
61 | * @bio, and updates @bio to represent the remaining sectors. | ||
62 | * | ||
63 | * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio | ||
64 | * unchanged. | ||
65 | * | ||
66 | * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a | ||
67 | * bvec boundry; it is the caller's responsibility to ensure that @bio is not | ||
68 | * freed before the split. | ||
69 | * | ||
70 | * If bch_bio_split() is running under generic_make_request(), it's not safe to | ||
71 | * allocate more than one bio from the same bio set. Therefore, if it is running | ||
72 | * under generic_make_request() it masks out __GFP_WAIT when doing the | ||
73 | * allocation. The caller must check for failure if there's any possibility of | ||
74 | * it being called from under generic_make_request(); it is then the caller's | ||
75 | * responsibility to retry from a safe context (by e.g. punting to workqueue). | ||
76 | */ | ||
77 | struct bio *bch_bio_split(struct bio *bio, int sectors, | ||
78 | gfp_t gfp, struct bio_set *bs) | ||
79 | { | ||
80 | unsigned idx = bio->bi_idx, vcnt = 0, nbytes = sectors << 9; | ||
81 | struct bio_vec *bv; | ||
82 | struct bio *ret = NULL; | ||
83 | |||
84 | BUG_ON(sectors <= 0); | ||
85 | |||
86 | /* | ||
87 | * If we're being called from underneath generic_make_request() and we | ||
88 | * already allocated any bios from this bio set, we risk deadlock if we | ||
89 | * use the mempool. So instead, we possibly fail and let the caller punt | ||
90 | * to workqueue or somesuch and retry in a safe context. | ||
91 | */ | ||
92 | if (current->bio_list) | ||
93 | gfp &= ~__GFP_WAIT; | ||
94 | |||
95 | if (sectors >= bio_sectors(bio)) | ||
96 | return bio; | ||
97 | |||
98 | if (bio->bi_rw & REQ_DISCARD) { | ||
99 | ret = bio_alloc_bioset(gfp, 1, bs); | ||
100 | idx = 0; | ||
101 | goto out; | ||
102 | } | ||
103 | |||
104 | bio_for_each_segment(bv, bio, idx) { | ||
105 | vcnt = idx - bio->bi_idx; | ||
106 | |||
107 | if (!nbytes) { | ||
108 | ret = bio_alloc_bioset(gfp, vcnt, bs); | ||
109 | if (!ret) | ||
110 | return NULL; | ||
111 | |||
112 | memcpy(ret->bi_io_vec, bio_iovec(bio), | ||
113 | sizeof(struct bio_vec) * vcnt); | ||
114 | |||
115 | break; | ||
116 | } else if (nbytes < bv->bv_len) { | ||
117 | ret = bio_alloc_bioset(gfp, ++vcnt, bs); | ||
118 | if (!ret) | ||
119 | return NULL; | ||
120 | |||
121 | memcpy(ret->bi_io_vec, bio_iovec(bio), | ||
122 | sizeof(struct bio_vec) * vcnt); | ||
123 | |||
124 | ret->bi_io_vec[vcnt - 1].bv_len = nbytes; | ||
125 | bv->bv_offset += nbytes; | ||
126 | bv->bv_len -= nbytes; | ||
127 | break; | ||
128 | } | ||
129 | |||
130 | nbytes -= bv->bv_len; | ||
131 | } | ||
132 | out: | ||
133 | ret->bi_bdev = bio->bi_bdev; | ||
134 | ret->bi_sector = bio->bi_sector; | ||
135 | ret->bi_size = sectors << 9; | ||
136 | ret->bi_rw = bio->bi_rw; | ||
137 | ret->bi_vcnt = vcnt; | ||
138 | ret->bi_max_vecs = vcnt; | ||
139 | |||
140 | bio->bi_sector += sectors; | ||
141 | bio->bi_size -= sectors << 9; | ||
142 | bio->bi_idx = idx; | ||
143 | |||
144 | if (bio_integrity(bio)) { | ||
145 | if (bio_integrity_clone(ret, bio, gfp)) { | ||
146 | bio_put(ret); | ||
147 | return NULL; | ||
148 | } | ||
149 | |||
150 | bio_integrity_trim(ret, 0, bio_sectors(ret)); | ||
151 | bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio)); | ||
152 | } | ||
153 | |||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | static unsigned bch_bio_max_sectors(struct bio *bio) | ||
158 | { | ||
159 | unsigned ret = bio_sectors(bio); | ||
160 | struct request_queue *q = bdev_get_queue(bio->bi_bdev); | ||
161 | unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES, | ||
162 | queue_max_segments(q)); | ||
163 | struct bio_vec *bv, *end = bio_iovec(bio) + | ||
164 | min_t(int, bio_segments(bio), max_segments); | ||
165 | |||
166 | if (bio->bi_rw & REQ_DISCARD) | ||
167 | return min(ret, q->limits.max_discard_sectors); | ||
168 | |||
169 | if (bio_segments(bio) > max_segments || | ||
170 | q->merge_bvec_fn) { | ||
171 | ret = 0; | ||
172 | |||
173 | for (bv = bio_iovec(bio); bv < end; bv++) { | ||
174 | struct bvec_merge_data bvm = { | ||
175 | .bi_bdev = bio->bi_bdev, | ||
176 | .bi_sector = bio->bi_sector, | ||
177 | .bi_size = ret << 9, | ||
178 | .bi_rw = bio->bi_rw, | ||
179 | }; | ||
180 | |||
181 | if (q->merge_bvec_fn && | ||
182 | q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) | ||
183 | break; | ||
184 | |||
185 | ret += bv->bv_len >> 9; | ||
186 | } | ||
187 | } | ||
188 | |||
189 | ret = min(ret, queue_max_sectors(q)); | ||
190 | |||
191 | WARN_ON(!ret); | ||
192 | ret = max_t(int, ret, bio_iovec(bio)->bv_len >> 9); | ||
193 | |||
194 | return ret; | ||
195 | } | ||
196 | |||
197 | static void bch_bio_submit_split_done(struct closure *cl) | ||
198 | { | ||
199 | struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); | ||
200 | |||
201 | s->bio->bi_end_io = s->bi_end_io; | ||
202 | s->bio->bi_private = s->bi_private; | ||
203 | bio_endio(s->bio, 0); | ||
204 | |||
205 | closure_debug_destroy(&s->cl); | ||
206 | mempool_free(s, s->p->bio_split_hook); | ||
207 | } | ||
208 | |||
209 | static void bch_bio_submit_split_endio(struct bio *bio, int error) | ||
210 | { | ||
211 | struct closure *cl = bio->bi_private; | ||
212 | struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); | ||
213 | |||
214 | if (error) | ||
215 | clear_bit(BIO_UPTODATE, &s->bio->bi_flags); | ||
216 | |||
217 | bio_put(bio); | ||
218 | closure_put(cl); | ||
219 | } | ||
220 | |||
221 | static void __bch_bio_submit_split(struct closure *cl) | ||
222 | { | ||
223 | struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); | ||
224 | struct bio *bio = s->bio, *n; | ||
225 | |||
226 | do { | ||
227 | n = bch_bio_split(bio, bch_bio_max_sectors(bio), | ||
228 | GFP_NOIO, s->p->bio_split); | ||
229 | if (!n) | ||
230 | continue_at(cl, __bch_bio_submit_split, system_wq); | ||
231 | |||
232 | n->bi_end_io = bch_bio_submit_split_endio; | ||
233 | n->bi_private = cl; | ||
234 | |||
235 | closure_get(cl); | ||
236 | bch_generic_make_request_hack(n); | ||
237 | } while (n != bio); | ||
238 | |||
239 | continue_at(cl, bch_bio_submit_split_done, NULL); | ||
240 | } | ||
241 | |||
242 | void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) | ||
243 | { | ||
244 | struct bio_split_hook *s; | ||
245 | |||
246 | if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) | ||
247 | goto submit; | ||
248 | |||
249 | if (bio_sectors(bio) <= bch_bio_max_sectors(bio)) | ||
250 | goto submit; | ||
251 | |||
252 | s = mempool_alloc(p->bio_split_hook, GFP_NOIO); | ||
253 | |||
254 | s->bio = bio; | ||
255 | s->p = p; | ||
256 | s->bi_end_io = bio->bi_end_io; | ||
257 | s->bi_private = bio->bi_private; | ||
258 | bio_get(bio); | ||
259 | |||
260 | closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL); | ||
261 | return; | ||
262 | submit: | ||
263 | bch_generic_make_request_hack(bio); | ||
264 | } | ||
265 | |||
266 | /* Bios with headers */ | ||
267 | |||
268 | void bch_bbio_free(struct bio *bio, struct cache_set *c) | ||
269 | { | ||
270 | struct bbio *b = container_of(bio, struct bbio, bio); | ||
271 | mempool_free(b, c->bio_meta); | ||
272 | } | ||
273 | |||
274 | struct bio *bch_bbio_alloc(struct cache_set *c) | ||
275 | { | ||
276 | struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO); | ||
277 | struct bio *bio = &b->bio; | ||
278 | |||
279 | bio_init(bio); | ||
280 | bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; | ||
281 | bio->bi_max_vecs = bucket_pages(c); | ||
282 | bio->bi_io_vec = bio->bi_inline_vecs; | ||
283 | |||
284 | return bio; | ||
285 | } | ||
286 | |||
287 | void __bch_submit_bbio(struct bio *bio, struct cache_set *c) | ||
288 | { | ||
289 | struct bbio *b = container_of(bio, struct bbio, bio); | ||
290 | |||
291 | bio->bi_sector = PTR_OFFSET(&b->key, 0); | ||
292 | bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; | ||
293 | |||
294 | b->submit_time_us = local_clock_us(); | ||
295 | closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0)); | ||
296 | } | ||
297 | |||
298 | void bch_submit_bbio(struct bio *bio, struct cache_set *c, | ||
299 | struct bkey *k, unsigned ptr) | ||
300 | { | ||
301 | struct bbio *b = container_of(bio, struct bbio, bio); | ||
302 | bch_bkey_copy_single_ptr(&b->key, k, ptr); | ||
303 | __bch_submit_bbio(bio, c); | ||
304 | } | ||
305 | |||
306 | /* IO errors */ | ||
307 | |||
308 | void bch_count_io_errors(struct cache *ca, int error, const char *m) | ||
309 | { | ||
310 | /* | ||
311 | * The halflife of an error is: | ||
312 | * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh | ||
313 | */ | ||
314 | |||
315 | if (ca->set->error_decay) { | ||
316 | unsigned count = atomic_inc_return(&ca->io_count); | ||
317 | |||
318 | while (count > ca->set->error_decay) { | ||
319 | unsigned errors; | ||
320 | unsigned old = count; | ||
321 | unsigned new = count - ca->set->error_decay; | ||
322 | |||
323 | /* | ||
324 | * First we subtract refresh from count; each time we | ||
325 | * succesfully do so, we rescale the errors once: | ||
326 | */ | ||
327 | |||
328 | count = atomic_cmpxchg(&ca->io_count, old, new); | ||
329 | |||
330 | if (count == old) { | ||
331 | count = new; | ||
332 | |||
333 | errors = atomic_read(&ca->io_errors); | ||
334 | do { | ||
335 | old = errors; | ||
336 | new = ((uint64_t) errors * 127) / 128; | ||
337 | errors = atomic_cmpxchg(&ca->io_errors, | ||
338 | old, new); | ||
339 | } while (old != errors); | ||
340 | } | ||
341 | } | ||
342 | } | ||
343 | |||
344 | if (error) { | ||
345 | char buf[BDEVNAME_SIZE]; | ||
346 | unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, | ||
347 | &ca->io_errors); | ||
348 | errors >>= IO_ERROR_SHIFT; | ||
349 | |||
350 | if (errors < ca->set->error_limit) | ||
351 | pr_err("%s: IO error on %s, recovering", | ||
352 | bdevname(ca->bdev, buf), m); | ||
353 | else | ||
354 | bch_cache_set_error(ca->set, | ||
355 | "%s: too many IO errors %s", | ||
356 | bdevname(ca->bdev, buf), m); | ||
357 | } | ||
358 | } | ||
359 | |||
360 | void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, | ||
361 | int error, const char *m) | ||
362 | { | ||
363 | struct bbio *b = container_of(bio, struct bbio, bio); | ||
364 | struct cache *ca = PTR_CACHE(c, &b->key, 0); | ||
365 | |||
366 | unsigned threshold = bio->bi_rw & REQ_WRITE | ||
367 | ? c->congested_write_threshold_us | ||
368 | : c->congested_read_threshold_us; | ||
369 | |||
370 | if (threshold) { | ||
371 | unsigned t = local_clock_us(); | ||
372 | |||
373 | int us = t - b->submit_time_us; | ||
374 | int congested = atomic_read(&c->congested); | ||
375 | |||
376 | if (us > (int) threshold) { | ||
377 | int ms = us / 1024; | ||
378 | c->congested_last_us = t; | ||
379 | |||
380 | ms = min(ms, CONGESTED_MAX + congested); | ||
381 | atomic_sub(ms, &c->congested); | ||
382 | } else if (congested < 0) | ||
383 | atomic_inc(&c->congested); | ||
384 | } | ||
385 | |||
386 | bch_count_io_errors(ca, error, m); | ||
387 | } | ||
388 | |||
389 | void bch_bbio_endio(struct cache_set *c, struct bio *bio, | ||
390 | int error, const char *m) | ||
391 | { | ||
392 | struct closure *cl = bio->bi_private; | ||
393 | |||
394 | bch_bbio_count_io_errors(c, bio, error, m); | ||
395 | bio_put(bio); | ||
396 | closure_put(cl); | ||
397 | } | ||